LLVM  17.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/StringSwitch.h"
57 #include "llvm/IR/CallingConv.h"
58 #include "llvm/IR/Constant.h"
59 #include "llvm/IR/Constants.h"
60 #include "llvm/IR/DataLayout.h"
61 #include "llvm/IR/DebugLoc.h"
62 #include "llvm/IR/DerivedTypes.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/GlobalValue.h"
65 #include "llvm/IR/IRBuilder.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/Intrinsics.h"
68 #include "llvm/IR/IntrinsicsPowerPC.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/MC/MCContext.h"
74 #include "llvm/MC/MCExpr.h"
75 #include "llvm/MC/MCRegisterInfo.h"
76 #include "llvm/MC/MCSectionXCOFF.h"
77 #include "llvm/MC/MCSymbolXCOFF.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/Compiler.h"
84 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/Format.h"
87 #include "llvm/Support/KnownBits.h"
93 #include <algorithm>
94 #include <cassert>
95 #include <cstdint>
96 #include <iterator>
97 #include <list>
98 #include <optional>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
125  "ppc-quadword-atomics",
126  cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
127  cl::Hidden);
128 
129 static cl::opt<bool>
130  DisablePerfectShuffle("ppc-disable-perfect-shuffle",
131  cl::desc("disable vector permute decomposition"),
132  cl::init(true), cl::Hidden);
133 
135  "disable-auto-paired-vec-st",
136  cl::desc("disable automatically generated 32byte paired vector stores"),
137  cl::init(true), cl::Hidden);
138 
139 STATISTIC(NumTailCalls, "Number of tail calls");
140 STATISTIC(NumSiblingCalls, "Number of sibling calls");
141 STATISTIC(ShufflesHandledWithVPERM,
142  "Number of shuffles lowered to a VPERM or XXPERM");
143 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
144 
145 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
146 
147 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
148 
149 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
150 
151 // FIXME: Remove this once the bug has been fixed!
153 
155  const PPCSubtarget &STI)
156  : TargetLowering(TM), Subtarget(STI) {
157  // Initialize map that relates the PPC addressing modes to the computed flags
158  // of a load/store instruction. The map is used to determine the optimal
159  // addressing mode when selecting load and stores.
160  initializeAddrModeMap();
161  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
162  // arguments are at least 4/8 bytes aligned.
163  bool isPPC64 = Subtarget.isPPC64();
164  setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
165 
166  // Set up the register classes.
167  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
168  if (!useSoftFloat()) {
169  if (hasSPE()) {
170  addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
171  // EFPU2 APU only supports f32
172  if (!Subtarget.hasEFPU2())
173  addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
174  } else {
175  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
176  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
177  }
178  }
179 
180  // Match BITREVERSE to customized fast code sequence in the td file.
183 
184  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
186 
187  // Custom lower inline assembly to check for special registers.
190 
191  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
192  for (MVT VT : MVT::integer_valuetypes()) {
195  }
196 
197  if (Subtarget.isISA3_0()) {
202  } else {
203  // No extending loads from f16 or HW conversions back and forth.
212  }
213 
215 
216  // PowerPC has pre-inc load and store's.
227  if (!Subtarget.hasSPE()) {
232  }
233 
234  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
235  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
236  for (MVT VT : ScalarIntVTs) {
241  }
242 
243  if (Subtarget.useCRBits()) {
245 
246  if (isPPC64 || Subtarget.hasFPCVT()) {
249  isPPC64 ? MVT::i64 : MVT::i32);
252  isPPC64 ? MVT::i64 : MVT::i32);
253 
256  isPPC64 ? MVT::i64 : MVT::i32);
259  isPPC64 ? MVT::i64 : MVT::i32);
260 
263  isPPC64 ? MVT::i64 : MVT::i32);
266  isPPC64 ? MVT::i64 : MVT::i32);
267 
270  isPPC64 ? MVT::i64 : MVT::i32);
273  isPPC64 ? MVT::i64 : MVT::i32);
274  } else {
279  }
280 
281  // PowerPC does not support direct load/store of condition registers.
284 
285  // FIXME: Remove this once the ANDI glue bug is fixed:
286  if (ANDIGlueBug)
288 
289  for (MVT VT : MVT::integer_valuetypes()) {
293  }
294 
295  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
296  }
297 
298  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
299  // PPC (the libcall is not available).
304 
305  // We do not currently implement these libm ops for PowerPC.
312 
313  // PowerPC has no SREM/UREM instructions unless we are on P9
314  // On P9 we may use a hardware instruction to compute the remainder.
315  // When the result of both the remainder and the division is required it is
316  // more efficient to compute the remainder from the result of the division
317  // rather than use the remainder instruction. The instructions are legalized
318  // directly because the DivRemPairsPass performs the transformation at the IR
319  // level.
320  if (Subtarget.isISA3_0()) {
325  } else {
330  }
331 
332  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
341 
342  // Handle constrained floating-point operations of scalar.
343  // TODO: Handle SPE specific operation.
349 
354 
355  if (!Subtarget.hasSPE()) {
358  }
359 
360  if (Subtarget.hasVSX()) {
363  }
364 
365  if (Subtarget.hasFSQRT()) {
368  }
369 
370  if (Subtarget.hasFPRND()) {
375 
380  }
381 
382  // We don't support sin/cos/sqrt/fmod/pow
393 
394  // MASS transformation for LLVM intrinsics with replicating fast-math flag
395  // to be consistent to PPCGenScalarMASSEntries pass
396  if (TM.getOptLevel() == CodeGenOpt::Aggressive) {
409  }
410 
411  if (Subtarget.hasSPE()) {
414  } else {
417  }
418 
419  if (Subtarget.hasSPE())
421 
423 
424  // If we're enabling GP optimizations, use hardware square root
425  if (!Subtarget.hasFSQRT() &&
426  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
427  Subtarget.hasFRE()))
429 
430  if (!Subtarget.hasFSQRT() &&
431  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
432  Subtarget.hasFRES()))
434 
435  if (Subtarget.hasFCPSGN()) {
438  } else {
441  }
442 
443  if (Subtarget.hasFPRND()) {
448 
453  }
454 
455  // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
456  // instruction xxbrd to speed up scalar BSWAP64.
457  if (Subtarget.isISA3_1()) {
460  } else {
464  (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
465  }
466 
467  // CTPOP or CTTZ were introduced in P8/P9 respectively
468  if (Subtarget.isISA3_0()) {
471  } else {
474  }
475 
476  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
479  } else {
482  }
483 
484  // PowerPC does not have ROTR
487 
488  if (!Subtarget.useCRBits()) {
489  // PowerPC does not have Select
494  }
495 
496  // PowerPC wants to turn select_cc of FP into fsel when possible.
499 
500  // PowerPC wants to optimize integer setcc a bit
501  if (!Subtarget.useCRBits())
503 
504  if (Subtarget.hasFPU()) {
508 
512  }
513 
514  // PowerPC does not have BRCOND which requires SetCC
515  if (!Subtarget.useCRBits())
517 
519 
520  if (Subtarget.hasSPE()) {
521  // SPE has built-in conversions
528 
529  // SPE supports signaling compare of f32/f64.
532  } else {
533  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
536 
537  // PowerPC does not have [U|S]INT_TO_FP
542  }
543 
544  if (Subtarget.hasDirectMove() && isPPC64) {
549  if (TM.Options.UnsafeFPMath) {
558  }
559  } else {
564  }
565 
566  // We cannot sextinreg(i1). Expand to shifts.
568 
569  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
570  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
571  // support continuation, user-level threading, and etc.. As a result, no
572  // other SjLj exception interfaces are implemented and please don't build
573  // your own exception handling based on them.
574  // LLVM/Clang supports zero-cost DWARF exception handling.
577 
578  // We want to legalize GlobalAddress and ConstantPool nodes into the
579  // appropriate instructions to materialize the address.
590 
591  // TRAP is legal.
593 
594  // TRAMPOLINE is custom lowered.
597 
598  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
600 
601  if (Subtarget.is64BitELFABI()) {
602  // VAARG always uses double-word chunks, so promote anything smaller.
612  } else if (Subtarget.is32BitELFABI()) {
613  // VAARG is custom lowered with the 32-bit SVR4 ABI.
616  } else
618 
619  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
620  if (Subtarget.is32BitELFABI())
622  else
624 
625  // Use the default implementation.
635 
636  // We want to custom lower some of our intrinsics.
642 
643  // To handle counter-based loop conditions.
645 
650 
651  // Comparisons that require checking two conditions.
652  if (Subtarget.hasSPE()) {
657  }
670 
673 
674  if (Subtarget.has64BitSupport()) {
675  // They also have instructions for converting between i64 and fp.
684  // This is just the low 32 bits of a (signed) fp->i64 conversion.
685  // We cannot do this with Promote because i64 is not a legal type.
688 
689  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
692  }
693  } else {
694  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
695  if (Subtarget.hasSPE()) {
698  } else {
701  }
702  }
703 
704  // With the instructions enabled under FPCVT, we can do everything.
705  if (Subtarget.hasFPCVT()) {
706  if (Subtarget.has64BitSupport()) {
715  }
716 
725  }
726 
727  if (Subtarget.use64BitRegs()) {
728  // 64-bit PowerPC implementations can support i64 types directly
729  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
730  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
732  // 64-bit PowerPC wants to expand i128 shifts itself.
736  } else {
737  // 32-bit PowerPC wants to expand i64 shifts itself.
741  }
742 
743  // PowerPC has better expansions for funnel shifts than the generic
744  // TargetLowering::expandFunnelShift.
745  if (Subtarget.has64BitSupport()) {
748  }
751 
752  if (Subtarget.hasVSX()) {
757  }
758 
759  if (Subtarget.hasAltivec()) {
760  for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
765  }
766  // First set operation action for all vector types to expand. Then we
767  // will selectively turn on ones that can be effectively codegen'd.
768  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
769  // add/sub are legal for all supported vector VT's.
772 
773  // For v2i64, these are only valid with P8Vector. This is corrected after
774  // the loop.
775  if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
780  }
781  else {
786  }
787 
788  if (Subtarget.hasVSX()) {
791  }
792 
793  // Vector instructions introduced in P8
794  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
797  }
798  else {
801  }
802 
803  // Vector instructions introduced in P9
804  if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
806  else
808 
809  // We promote all shuffles to v16i8.
812 
813  // We promote all non-typed operations to v4i32.
829 
830  // No other operations are legal.
868 
869  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
870  setTruncStoreAction(VT, InnerVT, Expand);
871  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
872  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
873  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
874  }
875  }
877  if (!Subtarget.hasP8Vector()) {
882  }
883 
884  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
885  // with merges, splats, etc.
887 
888  // Vector truncates to sub-word integer that fit in an Altivec/VSX register
889  // are cheap, so handle them before they get expanded to scalar.
895 
901  Subtarget.useCRBits() ? Legal : Expand);
915 
916  // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
918  // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
919  if (Subtarget.hasAltivec())
920  for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
922  // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
923  if (Subtarget.hasP8Altivec())
925 
926  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
927  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
928  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
929  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
930 
933 
934  if (Subtarget.hasVSX()) {
938  }
939 
940  if (Subtarget.hasP8Altivec())
942  else
944 
945  if (Subtarget.isISA3_1()) {
964  }
965 
968 
971 
976 
977  // Altivec does not contain unordered floating-point compare instructions
982 
983  if (Subtarget.hasVSX()) {
986  if (Subtarget.hasP8Vector()) {
989  }
990  if (Subtarget.hasDirectMove() && isPPC64) {
999  }
1001 
1002  // The nearbyint variants are not allowed to raise the inexact exception
1003  // so we can only code-gen them with unsafe math.
1004  if (TM.Options.UnsafeFPMath) {
1007  }
1008 
1017 
1023 
1026 
1029 
1030  // Share the Altivec comparison restrictions.
1035 
1038 
1040 
1041  if (Subtarget.hasP8Vector())
1042  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1043 
1044  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1045 
1046  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1047  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1048  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1049 
1050  if (Subtarget.hasP8Altivec()) {
1054 
1055  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1056  // SRL, but not for SRA because of the instructions available:
1057  // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1058  // doing
1062 
1064  }
1065  else {
1069 
1071 
1072  // VSX v2i64 only supports non-arithmetic operations.
1075  }
1076 
1077  if (Subtarget.isISA3_1())
1079  else
1081 
1086 
1088 
1097 
1098  // Custom handling for partial vectors of integers converted to
1099  // floating point. We already have optimal handling for v2i32 through
1100  // the DAG combine, so those aren't necessary.
1117 
1124 
1127 
1128  // Handle constrained floating-point operations of vector.
1129  // The predictor is `hasVSX` because altivec instruction has
1130  // no exception but VSX vector instruction has.
1144 
1158 
1159  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1160  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1161 
1162  for (MVT FPT : MVT::fp_valuetypes())
1164 
1165  // Expand the SELECT to SELECT_CC
1167 
1170 
1171  // No implementation for these ops for PowerPC.
1177  }
1178 
1179  if (Subtarget.hasP8Altivec()) {
1180  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1181  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1182  }
1183 
1184  if (Subtarget.hasP9Vector()) {
1187 
1188  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1189  // SRL, but not for SRA because of the instructions available:
1190  // VS{RL} and VS{RL}O.
1194 
1200 
1208 
1215 
1219 
1220  // Handle constrained floating-point operations of fp128
1241  } else if (Subtarget.hasVSX()) {
1244 
1247 
1248  // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1249  // fp_to_uint and int_to_fp.
1252 
1260 
1261  // Expand the fp_extend if the target type is fp128.
1264 
1265  // Expand the fp_round if the source type is fp128.
1266  for (MVT VT : {MVT::f32, MVT::f64}) {
1269  }
1270 
1275 
1276  // Lower following f128 select_cc pattern:
1277  // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1279 
1280  // We need to handle f128 SELECT_CC with integer result type.
1283  }
1284 
1285  if (Subtarget.hasP9Altivec()) {
1286  if (Subtarget.isISA3_1()) {
1291  } else {
1294  }
1302  }
1303 
1304  if (Subtarget.hasP10Vector()) {
1306  }
1307  }
1308 
1309  if (Subtarget.pairedVectorMemops()) {
1310  addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1313  }
1314  if (Subtarget.hasMMA()) {
1315  if (Subtarget.isISAFuture())
1316  addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1317  else
1318  addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1322  }
1323 
1324  if (Subtarget.has64BitSupport())
1326 
1327  if (Subtarget.isISA3_1())
1329 
1331 
1332  if (!isPPC64) {
1335  }
1336 
1341  }
1342 
1344 
1345  if (Subtarget.hasAltivec()) {
1346  // Altivec instructions set fields to all zeros or all ones.
1348  }
1349 
1350  setLibcallName(RTLIB::MULO_I128, nullptr);
1351  if (!isPPC64) {
1352  // These libcalls are not available in 32-bit.
1353  setLibcallName(RTLIB::SHL_I128, nullptr);
1354  setLibcallName(RTLIB::SRL_I128, nullptr);
1355  setLibcallName(RTLIB::SRA_I128, nullptr);
1356  setLibcallName(RTLIB::MUL_I128, nullptr);
1357  setLibcallName(RTLIB::MULO_I64, nullptr);
1358  }
1359 
1360  if (!isPPC64)
1362  else if (shouldInlineQuadwordAtomics())
1364  else
1366 
1367  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1368 
1369  // We have target-specific dag combine patterns for the following nodes:
1372  if (Subtarget.hasFPCVT())
1375  if (Subtarget.useCRBits())
1379 
1381 
1383 
1384  if (Subtarget.useCRBits()) {
1386  }
1387 
1388  if (Subtarget.hasP9Altivec()) {
1390  }
1391 
1392  setLibcallName(RTLIB::LOG_F128, "logf128");
1393  setLibcallName(RTLIB::LOG2_F128, "log2f128");
1394  setLibcallName(RTLIB::LOG10_F128, "log10f128");
1395  setLibcallName(RTLIB::EXP_F128, "expf128");
1396  setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1397  setLibcallName(RTLIB::SIN_F128, "sinf128");
1398  setLibcallName(RTLIB::COS_F128, "cosf128");
1399  setLibcallName(RTLIB::POW_F128, "powf128");
1400  setLibcallName(RTLIB::FMIN_F128, "fminf128");
1401  setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1402  setLibcallName(RTLIB::REM_F128, "fmodf128");
1403  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1404  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1405  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1406  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1407  setLibcallName(RTLIB::ROUND_F128, "roundf128");
1408  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1409  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1410  setLibcallName(RTLIB::RINT_F128, "rintf128");
1411  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1412  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1413  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1414  setLibcallName(RTLIB::FMA_F128, "fmaf128");
1415 
1416  // With 32 condition bits, we don't need to sink (and duplicate) compares
1417  // aggressively in CodeGenPrep.
1418  if (Subtarget.useCRBits()) {
1421  }
1422 
1424 
1425  switch (Subtarget.getCPUDirective()) {
1426  default: break;
1427  case PPC::DIR_970:
1428  case PPC::DIR_A2:
1429  case PPC::DIR_E500:
1430  case PPC::DIR_E500mc:
1431  case PPC::DIR_E5500:
1432  case PPC::DIR_PWR4:
1433  case PPC::DIR_PWR5:
1434  case PPC::DIR_PWR5X:
1435  case PPC::DIR_PWR6:
1436  case PPC::DIR_PWR6X:
1437  case PPC::DIR_PWR7:
1438  case PPC::DIR_PWR8:
1439  case PPC::DIR_PWR9:
1440  case PPC::DIR_PWR10:
1441  case PPC::DIR_PWR_FUTURE:
1444  break;
1445  }
1446 
1447  if (Subtarget.enableMachineScheduler())
1449  else
1451 
1453 
1454  // The Freescale cores do better with aggressive inlining of memcpy and
1455  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1456  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1457  Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1458  MaxStoresPerMemset = 32;
1460  MaxStoresPerMemcpy = 32;
1462  MaxStoresPerMemmove = 32;
1464  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1465  // The A2 also benefits from (very) aggressive inlining of memcpy and
1466  // friends. The overhead of a the function call, even when warm, can be
1467  // over one hundred cycles.
1468  MaxStoresPerMemset = 128;
1469  MaxStoresPerMemcpy = 128;
1470  MaxStoresPerMemmove = 128;
1471  MaxLoadsPerMemcmp = 128;
1472  } else {
1473  MaxLoadsPerMemcmp = 8;
1475  }
1476 
1477  IsStrictFPEnabled = true;
1478 
1479  // Let the subtarget (CPU) decide if a predictable select is more expensive
1480  // than the corresponding branch. This information is used in CGP to decide
1481  // when to convert selects into branches.
1483 }
1484 
1485 // *********************************** NOTE ************************************
1486 // For selecting load and store instructions, the addressing modes are defined
1487 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1488 // patterns to match the load the store instructions.
1489 //
1490 // The TD definitions for the addressing modes correspond to their respective
1491 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1492 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1493 // address mode flags of a particular node. Afterwards, the computed address
1494 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1495 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1496 // accordingly, based on the preferred addressing mode.
1497 //
1498 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1499 // MemOpFlags contains all the possible flags that can be used to compute the
1500 // optimal addressing mode for load and store instructions.
1501 // AddrMode contains all the possible load and store addressing modes available
1502 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1503 //
1504 // When adding new load and store instructions, it is possible that new address
1505 // flags may need to be added into MemOpFlags, and a new addressing mode will
1506 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1507 // of the minimal and main distinguishing address flags for the new load/store
1508 // instructions) will need to be added into initializeAddrModeMap() below.
1509 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1510 // need to be updated to account for selecting the optimal addressing mode.
1511 // *****************************************************************************
1512 /// Initialize the map that relates the different addressing modes of the load
1513 /// and store instructions to a set of flags. This ensures the load/store
1514 /// instruction is correctly matched during instruction selection.
1515 void PPCTargetLowering::initializeAddrModeMap() {
1516  AddrModesMap[PPC::AM_DForm] = {
1517  // LWZ, STW
1522  // LBZ, LHZ, STB, STH
1527  // LHA
1532  // LFS, LFD, STFS, STFD
1537  };
1538  AddrModesMap[PPC::AM_DSForm] = {
1539  // LWA
1543  // LD, STD
1547  // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1551  };
1552  AddrModesMap[PPC::AM_DQForm] = {
1553  // LXV, STXV
1557  };
1558  AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1560  // TODO: Add mapping for quadword load/store.
1561 }
1562 
1563 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1564 /// the desired ByVal argument alignment.
1565 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1566  if (MaxAlign == MaxMaxAlign)
1567  return;
1568  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1569  if (MaxMaxAlign >= 32 &&
1570  VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1571  MaxAlign = Align(32);
1572  else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1573  MaxAlign < 16)
1574  MaxAlign = Align(16);
1575  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1576  Align EltAlign;
1577  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1578  if (EltAlign > MaxAlign)
1579  MaxAlign = EltAlign;
1580  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1581  for (auto *EltTy : STy->elements()) {
1582  Align EltAlign;
1583  getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1584  if (EltAlign > MaxAlign)
1585  MaxAlign = EltAlign;
1586  if (MaxAlign == MaxMaxAlign)
1587  break;
1588  }
1589  }
1590 }
1591 
1592 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1593 /// function arguments in the caller parameter area.
1595  const DataLayout &DL) const {
1596  // 16byte and wider vectors are passed on 16byte boundary.
1597  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1598  Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1599  if (Subtarget.hasAltivec())
1600  getMaxByValAlign(Ty, Alignment, Align(16));
1601  return Alignment.value();
1602 }
1603 
1605  return Subtarget.useSoftFloat();
1606 }
1607 
1609  return Subtarget.hasSPE();
1610 }
1611 
1613  return VT.isScalarInteger();
1614 }
1615 
1616 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1617  switch ((PPCISD::NodeType)Opcode) {
1618  case PPCISD::FIRST_NUMBER: break;
1619  case PPCISD::FSEL: return "PPCISD::FSEL";
1620  case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1621  case PPCISD::XSMINC: return "PPCISD::XSMINC";
1622  case PPCISD::FCFID: return "PPCISD::FCFID";
1623  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1624  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1625  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1626  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1627  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1628  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1629  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1631  return "PPCISD::FP_TO_UINT_IN_VSR,";
1633  return "PPCISD::FP_TO_SINT_IN_VSR";
1634  case PPCISD::FRE: return "PPCISD::FRE";
1635  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1636  case PPCISD::FTSQRT:
1637  return "PPCISD::FTSQRT";
1638  case PPCISD::FSQRT:
1639  return "PPCISD::FSQRT";
1640  case PPCISD::STFIWX: return "PPCISD::STFIWX";
1641  case PPCISD::VPERM: return "PPCISD::VPERM";
1642  case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1644  return "PPCISD::XXSPLTI_SP_TO_DP";
1645  case PPCISD::XXSPLTI32DX:
1646  return "PPCISD::XXSPLTI32DX";
1647  case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1648  case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1649  case PPCISD::XXPERM:
1650  return "PPCISD::XXPERM";
1651  case PPCISD::VECSHL: return "PPCISD::VECSHL";
1652  case PPCISD::CMPB: return "PPCISD::CMPB";
1653  case PPCISD::Hi: return "PPCISD::Hi";
1654  case PPCISD::Lo: return "PPCISD::Lo";
1655  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1656  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1657  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1658  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1659  case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1660  case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1661  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1662  case PPCISD::SRL: return "PPCISD::SRL";
1663  case PPCISD::SRA: return "PPCISD::SRA";
1664  case PPCISD::SHL: return "PPCISD::SHL";
1665  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1666  case PPCISD::CALL: return "PPCISD::CALL";
1667  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1668  case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1669  case PPCISD::CALL_RM:
1670  return "PPCISD::CALL_RM";
1671  case PPCISD::CALL_NOP_RM:
1672  return "PPCISD::CALL_NOP_RM";
1673  case PPCISD::CALL_NOTOC_RM:
1674  return "PPCISD::CALL_NOTOC_RM";
1675  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1676  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1677  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1678  case PPCISD::BCTRL_RM:
1679  return "PPCISD::BCTRL_RM";
1681  return "PPCISD::BCTRL_LOAD_TOC_RM";
1682  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1683  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1684  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1685  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1686  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1687  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1688  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1689  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1690  case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1691  case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1693  return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1695  return "PPCISD::ANDI_rec_1_EQ_BIT";
1697  return "PPCISD::ANDI_rec_1_GT_BIT";
1698  case PPCISD::VCMP: return "PPCISD::VCMP";
1699  case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1700  case PPCISD::LBRX: return "PPCISD::LBRX";
1701  case PPCISD::STBRX: return "PPCISD::STBRX";
1702  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1703  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1704  case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1705  case PPCISD::STXSIX: return "PPCISD::STXSIX";
1706  case PPCISD::VEXTS: return "PPCISD::VEXTS";
1707  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1708  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1709  case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1710  case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1712  return "PPCISD::ST_VSR_SCAL_INT";
1713  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1714  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1715  case PPCISD::BDZ: return "PPCISD::BDZ";
1716  case PPCISD::MFFS: return "PPCISD::MFFS";
1717  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1718  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1719  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1720  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1721  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1722  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1723  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1724  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1725  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1726  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1727  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1728  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1729  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1730  case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1731  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1732  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1733  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1734  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1735  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1736  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1737  case PPCISD::PADDI_DTPREL:
1738  return "PPCISD::PADDI_DTPREL";
1739  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1740  case PPCISD::SC: return "PPCISD::SC";
1741  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1742  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1743  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1744  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1745  case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1746  case PPCISD::VABSD: return "PPCISD::VABSD";
1747  case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1748  case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1749  case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1750  case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1751  case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1752  case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1753  case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1755  return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1757  return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1758  case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1759  case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1760  case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1761  case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1762  case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1763  case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1764  case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1765  case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1767  return "PPCISD::STRICT_FADDRTZ";
1768  case PPCISD::STRICT_FCTIDZ:
1769  return "PPCISD::STRICT_FCTIDZ";
1770  case PPCISD::STRICT_FCTIWZ:
1771  return "PPCISD::STRICT_FCTIWZ";
1773  return "PPCISD::STRICT_FCTIDUZ";
1775  return "PPCISD::STRICT_FCTIWUZ";
1776  case PPCISD::STRICT_FCFID:
1777  return "PPCISD::STRICT_FCFID";
1778  case PPCISD::STRICT_FCFIDU:
1779  return "PPCISD::STRICT_FCFIDU";
1780  case PPCISD::STRICT_FCFIDS:
1781  return "PPCISD::STRICT_FCFIDS";
1783  return "PPCISD::STRICT_FCFIDUS";
1784  case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1785  case PPCISD::STORE_COND:
1786  return "PPCISD::STORE_COND";
1787  }
1788  return nullptr;
1789 }
1790 
1792  EVT VT) const {
1793  if (!VT.isVector())
1794  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1795 
1797 }
1798 
1800  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1801  return true;
1802 }
1803 
1804 //===----------------------------------------------------------------------===//
1805 // Node matching predicates, for use by the tblgen matching code.
1806 //===----------------------------------------------------------------------===//
1807 
1808 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1810  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1811  return CFP->getValueAPF().isZero();
1812  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1813  // Maybe this has already been legalized into the constant pool?
1814  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1815  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1816  return CFP->getValueAPF().isZero();
1817  }
1818  return false;
1819 }
1820 
1821 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1822 /// true if Op is undef or if it matches the specified value.
1823 static bool isConstantOrUndef(int Op, int Val) {
1824  return Op < 0 || Op == Val;
1825 }
1826 
1827 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1828 /// VPKUHUM instruction.
1829 /// The ShuffleKind distinguishes between big-endian operations with
1830 /// two different inputs (0), either-endian operations with two identical
1831 /// inputs (1), and little-endian operations with two different inputs (2).
1832 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1834  SelectionDAG &DAG) {
1835  bool IsLE = DAG.getDataLayout().isLittleEndian();
1836  if (ShuffleKind == 0) {
1837  if (IsLE)
1838  return false;
1839  for (unsigned i = 0; i != 16; ++i)
1840  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1841  return false;
1842  } else if (ShuffleKind == 2) {
1843  if (!IsLE)
1844  return false;
1845  for (unsigned i = 0; i != 16; ++i)
1846  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1847  return false;
1848  } else if (ShuffleKind == 1) {
1849  unsigned j = IsLE ? 0 : 1;
1850  for (unsigned i = 0; i != 8; ++i)
1851  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1852  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1853  return false;
1854  }
1855  return true;
1856 }
1857 
1858 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1859 /// VPKUWUM instruction.
1860 /// The ShuffleKind distinguishes between big-endian operations with
1861 /// two different inputs (0), either-endian operations with two identical
1862 /// inputs (1), and little-endian operations with two different inputs (2).
1863 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1865  SelectionDAG &DAG) {
1866  bool IsLE = DAG.getDataLayout().isLittleEndian();
1867  if (ShuffleKind == 0) {
1868  if (IsLE)
1869  return false;
1870  for (unsigned i = 0; i != 16; i += 2)
1871  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1872  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1873  return false;
1874  } else if (ShuffleKind == 2) {
1875  if (!IsLE)
1876  return false;
1877  for (unsigned i = 0; i != 16; i += 2)
1878  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1879  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1880  return false;
1881  } else if (ShuffleKind == 1) {
1882  unsigned j = IsLE ? 0 : 2;
1883  for (unsigned i = 0; i != 8; i += 2)
1884  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1885  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1886  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1887  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1888  return false;
1889  }
1890  return true;
1891 }
1892 
1893 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1894 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1895 /// current subtarget.
1896 ///
1897 /// The ShuffleKind distinguishes between big-endian operations with
1898 /// two different inputs (0), either-endian operations with two identical
1899 /// inputs (1), and little-endian operations with two different inputs (2).
1900 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1902  SelectionDAG &DAG) {
1903  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1904  if (!Subtarget.hasP8Vector())
1905  return false;
1906 
1907  bool IsLE = DAG.getDataLayout().isLittleEndian();
1908  if (ShuffleKind == 0) {
1909  if (IsLE)
1910  return false;
1911  for (unsigned i = 0; i != 16; i += 4)
1912  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1913  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1914  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1915  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1916  return false;
1917  } else if (ShuffleKind == 2) {
1918  if (!IsLE)
1919  return false;
1920  for (unsigned i = 0; i != 16; i += 4)
1921  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1922  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1923  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1924  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1925  return false;
1926  } else if (ShuffleKind == 1) {
1927  unsigned j = IsLE ? 0 : 4;
1928  for (unsigned i = 0; i != 8; i += 4)
1929  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1930  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1931  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1932  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1933  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1934  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1935  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1936  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1937  return false;
1938  }
1939  return true;
1940 }
1941 
1942 /// isVMerge - Common function, used to match vmrg* shuffles.
1943 ///
1944 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1945  unsigned LHSStart, unsigned RHSStart) {
1946  if (N->getValueType(0) != MVT::v16i8)
1947  return false;
1948  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1949  "Unsupported merge size!");
1950 
1951  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1952  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1953  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1954  LHSStart+j+i*UnitSize) ||
1955  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1956  RHSStart+j+i*UnitSize))
1957  return false;
1958  }
1959  return true;
1960 }
1961 
1962 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1963 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1964 /// The ShuffleKind distinguishes between big-endian merges with two
1965 /// different inputs (0), either-endian merges with two identical inputs (1),
1966 /// and little-endian merges with two different inputs (2). For the latter,
1967 /// the input operands are swapped (see PPCInstrAltivec.td).
1969  unsigned ShuffleKind, SelectionDAG &DAG) {
1970  if (DAG.getDataLayout().isLittleEndian()) {
1971  if (ShuffleKind == 1) // unary
1972  return isVMerge(N, UnitSize, 0, 0);
1973  else if (ShuffleKind == 2) // swapped
1974  return isVMerge(N, UnitSize, 0, 16);
1975  else
1976  return false;
1977  } else {
1978  if (ShuffleKind == 1) // unary
1979  return isVMerge(N, UnitSize, 8, 8);
1980  else if (ShuffleKind == 0) // normal
1981  return isVMerge(N, UnitSize, 8, 24);
1982  else
1983  return false;
1984  }
1985 }
1986 
1987 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1988 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1989 /// The ShuffleKind distinguishes between big-endian merges with two
1990 /// different inputs (0), either-endian merges with two identical inputs (1),
1991 /// and little-endian merges with two different inputs (2). For the latter,
1992 /// the input operands are swapped (see PPCInstrAltivec.td).
1994  unsigned ShuffleKind, SelectionDAG &DAG) {
1995  if (DAG.getDataLayout().isLittleEndian()) {
1996  if (ShuffleKind == 1) // unary
1997  return isVMerge(N, UnitSize, 8, 8);
1998  else if (ShuffleKind == 2) // swapped
1999  return isVMerge(N, UnitSize, 8, 24);
2000  else
2001  return false;
2002  } else {
2003  if (ShuffleKind == 1) // unary
2004  return isVMerge(N, UnitSize, 0, 0);
2005  else if (ShuffleKind == 0) // normal
2006  return isVMerge(N, UnitSize, 0, 16);
2007  else
2008  return false;
2009  }
2010 }
2011 
2012 /**
2013  * Common function used to match vmrgew and vmrgow shuffles
2014  *
2015  * The indexOffset determines whether to look for even or odd words in
2016  * the shuffle mask. This is based on the of the endianness of the target
2017  * machine.
2018  * - Little Endian:
2019  * - Use offset of 0 to check for odd elements
2020  * - Use offset of 4 to check for even elements
2021  * - Big Endian:
2022  * - Use offset of 0 to check for even elements
2023  * - Use offset of 4 to check for odd elements
2024  * A detailed description of the vector element ordering for little endian and
2025  * big endian can be found at
2026  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2027  * Targeting your applications - what little endian and big endian IBM XL C/C++
2028  * compiler differences mean to you
2029  *
2030  * The mask to the shuffle vector instruction specifies the indices of the
2031  * elements from the two input vectors to place in the result. The elements are
2032  * numbered in array-access order, starting with the first vector. These vectors
2033  * are always of type v16i8, thus each vector will contain 16 elements of size
2034  * 8. More info on the shuffle vector can be found in the
2035  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2036  * Language Reference.
2037  *
2038  * The RHSStartValue indicates whether the same input vectors are used (unary)
2039  * or two different input vectors are used, based on the following:
2040  * - If the instruction uses the same vector for both inputs, the range of the
2041  * indices will be 0 to 15. In this case, the RHSStart value passed should
2042  * be 0.
2043  * - If the instruction has two different vectors then the range of the
2044  * indices will be 0 to 31. In this case, the RHSStart value passed should
2045  * be 16 (indices 0-15 specify elements in the first vector while indices 16
2046  * to 31 specify elements in the second vector).
2047  *
2048  * \param[in] N The shuffle vector SD Node to analyze
2049  * \param[in] IndexOffset Specifies whether to look for even or odd elements
2050  * \param[in] RHSStartValue Specifies the starting index for the righthand input
2051  * vector to the shuffle_vector instruction
2052  * \return true iff this shuffle vector represents an even or odd word merge
2053  */
2054 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2055  unsigned RHSStartValue) {
2056  if (N->getValueType(0) != MVT::v16i8)
2057  return false;
2058 
2059  for (unsigned i = 0; i < 2; ++i)
2060  for (unsigned j = 0; j < 4; ++j)
2061  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2062  i*RHSStartValue+j+IndexOffset) ||
2063  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2064  i*RHSStartValue+j+IndexOffset+8))
2065  return false;
2066  return true;
2067 }
2068 
2069 /**
2070  * Determine if the specified shuffle mask is suitable for the vmrgew or
2071  * vmrgow instructions.
2072  *
2073  * \param[in] N The shuffle vector SD Node to analyze
2074  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2075  * \param[in] ShuffleKind Identify the type of merge:
2076  * - 0 = big-endian merge with two different inputs;
2077  * - 1 = either-endian merge with two identical inputs;
2078  * - 2 = little-endian merge with two different inputs (inputs are swapped for
2079  * little-endian merges).
2080  * \param[in] DAG The current SelectionDAG
2081  * \return true iff this shuffle mask
2082  */
2084  unsigned ShuffleKind, SelectionDAG &DAG) {
2085  if (DAG.getDataLayout().isLittleEndian()) {
2086  unsigned indexOffset = CheckEven ? 4 : 0;
2087  if (ShuffleKind == 1) // Unary
2088  return isVMerge(N, indexOffset, 0);
2089  else if (ShuffleKind == 2) // swapped
2090  return isVMerge(N, indexOffset, 16);
2091  else
2092  return false;
2093  }
2094  else {
2095  unsigned indexOffset = CheckEven ? 0 : 4;
2096  if (ShuffleKind == 1) // Unary
2097  return isVMerge(N, indexOffset, 0);
2098  else if (ShuffleKind == 0) // Normal
2099  return isVMerge(N, indexOffset, 16);
2100  else
2101  return false;
2102  }
2103  return false;
2104 }
2105 
2106 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2107 /// amount, otherwise return -1.
2108 /// The ShuffleKind distinguishes between big-endian operations with two
2109 /// different inputs (0), either-endian operations with two identical inputs
2110 /// (1), and little-endian operations with two different inputs (2). For the
2111 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2112 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2113  SelectionDAG &DAG) {
2114  if (N->getValueType(0) != MVT::v16i8)
2115  return -1;
2116 
2117  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2118 
2119  // Find the first non-undef value in the shuffle mask.
2120  unsigned i;
2121  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2122  /*search*/;
2123 
2124  if (i == 16) return -1; // all undef.
2125 
2126  // Otherwise, check to see if the rest of the elements are consecutively
2127  // numbered from this value.
2128  unsigned ShiftAmt = SVOp->getMaskElt(i);
2129  if (ShiftAmt < i) return -1;
2130 
2131  ShiftAmt -= i;
2132  bool isLE = DAG.getDataLayout().isLittleEndian();
2133 
2134  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2135  // Check the rest of the elements to see if they are consecutive.
2136  for (++i; i != 16; ++i)
2137  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2138  return -1;
2139  } else if (ShuffleKind == 1) {
2140  // Check the rest of the elements to see if they are consecutive.
2141  for (++i; i != 16; ++i)
2142  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2143  return -1;
2144  } else
2145  return -1;
2146 
2147  if (isLE)
2148  ShiftAmt = 16 - ShiftAmt;
2149 
2150  return ShiftAmt;
2151 }
2152 
2153 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2154 /// specifies a splat of a single element that is suitable for input to
2155 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2157  EVT VT = N->getValueType(0);
2158  if (VT == MVT::v2i64 || VT == MVT::v2f64)
2159  return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2160 
2161  assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2162  EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2163 
2164  // The consecutive indices need to specify an element, not part of two
2165  // different elements. So abandon ship early if this isn't the case.
2166  if (N->getMaskElt(0) % EltSize != 0)
2167  return false;
2168 
2169  // This is a splat operation if each element of the permute is the same, and
2170  // if the value doesn't reference the second vector.
2171  unsigned ElementBase = N->getMaskElt(0);
2172 
2173  // FIXME: Handle UNDEF elements too!
2174  if (ElementBase >= 16)
2175  return false;
2176 
2177  // Check that the indices are consecutive, in the case of a multi-byte element
2178  // splatted with a v16i8 mask.
2179  for (unsigned i = 1; i != EltSize; ++i)
2180  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2181  return false;
2182 
2183  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2184  if (N->getMaskElt(i) < 0) continue;
2185  for (unsigned j = 0; j != EltSize; ++j)
2186  if (N->getMaskElt(i+j) != N->getMaskElt(j))
2187  return false;
2188  }
2189  return true;
2190 }
2191 
2192 /// Check that the mask is shuffling N byte elements. Within each N byte
2193 /// element of the mask, the indices could be either in increasing or
2194 /// decreasing order as long as they are consecutive.
2195 /// \param[in] N the shuffle vector SD Node to analyze
2196 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2197 /// Word/DoubleWord/QuadWord).
2198 /// \param[in] StepLen the delta indices number among the N byte element, if
2199 /// the mask is in increasing/decreasing order then it is 1/-1.
2200 /// \return true iff the mask is shuffling N byte elements.
2202  int StepLen) {
2203  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2204  "Unexpected element width.");
2205  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2206 
2207  unsigned NumOfElem = 16 / Width;
2208  unsigned MaskVal[16]; // Width is never greater than 16
2209  for (unsigned i = 0; i < NumOfElem; ++i) {
2210  MaskVal[0] = N->getMaskElt(i * Width);
2211  if ((StepLen == 1) && (MaskVal[0] % Width)) {
2212  return false;
2213  } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2214  return false;
2215  }
2216 
2217  for (unsigned int j = 1; j < Width; ++j) {
2218  MaskVal[j] = N->getMaskElt(i * Width + j);
2219  if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2220  return false;
2221  }
2222  }
2223  }
2224 
2225  return true;
2226 }
2227 
2228 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2229  unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2230  if (!isNByteElemShuffleMask(N, 4, 1))
2231  return false;
2232 
2233  // Now we look at mask elements 0,4,8,12
2234  unsigned M0 = N->getMaskElt(0) / 4;
2235  unsigned M1 = N->getMaskElt(4) / 4;
2236  unsigned M2 = N->getMaskElt(8) / 4;
2237  unsigned M3 = N->getMaskElt(12) / 4;
2238  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2239  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2240 
2241  // Below, let H and L be arbitrary elements of the shuffle mask
2242  // where H is in the range [4,7] and L is in the range [0,3].
2243  // H, 1, 2, 3 or L, 5, 6, 7
2244  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2245  (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2246  ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2247  InsertAtByte = IsLE ? 12 : 0;
2248  Swap = M0 < 4;
2249  return true;
2250  }
2251  // 0, H, 2, 3 or 4, L, 6, 7
2252  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2253  (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2254  ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2255  InsertAtByte = IsLE ? 8 : 4;
2256  Swap = M1 < 4;
2257  return true;
2258  }
2259  // 0, 1, H, 3 or 4, 5, L, 7
2260  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2261  (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2262  ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2263  InsertAtByte = IsLE ? 4 : 8;
2264  Swap = M2 < 4;
2265  return true;
2266  }
2267  // 0, 1, 2, H or 4, 5, 6, L
2268  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2269  (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2270  ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2271  InsertAtByte = IsLE ? 0 : 12;
2272  Swap = M3 < 4;
2273  return true;
2274  }
2275 
2276  // If both vector operands for the shuffle are the same vector, the mask will
2277  // contain only elements from the first one and the second one will be undef.
2278  if (N->getOperand(1).isUndef()) {
2279  ShiftElts = 0;
2280  Swap = true;
2281  unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2282  if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2283  InsertAtByte = IsLE ? 12 : 0;
2284  return true;
2285  }
2286  if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2287  InsertAtByte = IsLE ? 8 : 4;
2288  return true;
2289  }
2290  if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2291  InsertAtByte = IsLE ? 4 : 8;
2292  return true;
2293  }
2294  if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2295  InsertAtByte = IsLE ? 0 : 12;
2296  return true;
2297  }
2298  }
2299 
2300  return false;
2301 }
2302 
2304  bool &Swap, bool IsLE) {
2305  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2306  // Ensure each byte index of the word is consecutive.
2307  if (!isNByteElemShuffleMask(N, 4, 1))
2308  return false;
2309 
2310  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2311  unsigned M0 = N->getMaskElt(0) / 4;
2312  unsigned M1 = N->getMaskElt(4) / 4;
2313  unsigned M2 = N->getMaskElt(8) / 4;
2314  unsigned M3 = N->getMaskElt(12) / 4;
2315 
2316  // If both vector operands for the shuffle are the same vector, the mask will
2317  // contain only elements from the first one and the second one will be undef.
2318  if (N->getOperand(1).isUndef()) {
2319  assert(M0 < 4 && "Indexing into an undef vector?");
2320  if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2321  return false;
2322 
2323  ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2324  Swap = false;
2325  return true;
2326  }
2327 
2328  // Ensure each word index of the ShuffleVector Mask is consecutive.
2329  if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2330  return false;
2331 
2332  if (IsLE) {
2333  if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2334  // Input vectors don't need to be swapped if the leading element
2335  // of the result is one of the 3 left elements of the second vector
2336  // (or if there is no shift to be done at all).
2337  Swap = false;
2338  ShiftElts = (8 - M0) % 8;
2339  } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2340  // Input vectors need to be swapped if the leading element
2341  // of the result is one of the 3 left elements of the first vector
2342  // (or if we're shifting by 4 - thereby simply swapping the vectors).
2343  Swap = true;
2344  ShiftElts = (4 - M0) % 4;
2345  }
2346 
2347  return true;
2348  } else { // BE
2349  if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2350  // Input vectors don't need to be swapped if the leading element
2351  // of the result is one of the 4 elements of the first vector.
2352  Swap = false;
2353  ShiftElts = M0;
2354  } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2355  // Input vectors need to be swapped if the leading element
2356  // of the result is one of the 4 elements of the right vector.
2357  Swap = true;
2358  ShiftElts = M0 - 4;
2359  }
2360 
2361  return true;
2362  }
2363 }
2364 
2366  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2367 
2368  if (!isNByteElemShuffleMask(N, Width, -1))
2369  return false;
2370 
2371  for (int i = 0; i < 16; i += Width)
2372  if (N->getMaskElt(i) != i + Width - 1)
2373  return false;
2374 
2375  return true;
2376 }
2377 
2379  return isXXBRShuffleMaskHelper(N, 2);
2380 }
2381 
2383  return isXXBRShuffleMaskHelper(N, 4);
2384 }
2385 
2387  return isXXBRShuffleMaskHelper(N, 8);
2388 }
2389 
2391  return isXXBRShuffleMaskHelper(N, 16);
2392 }
2393 
2394 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2395 /// if the inputs to the instruction should be swapped and set \p DM to the
2396 /// value for the immediate.
2397 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2398 /// AND element 0 of the result comes from the first input (LE) or second input
2399 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2400 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2401 /// mask.
2403  bool &Swap, bool IsLE) {
2404  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2405 
2406  // Ensure each byte index of the double word is consecutive.
2407  if (!isNByteElemShuffleMask(N, 8, 1))
2408  return false;
2409 
2410  unsigned M0 = N->getMaskElt(0) / 8;
2411  unsigned M1 = N->getMaskElt(8) / 8;
2412  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2413 
2414  // If both vector operands for the shuffle are the same vector, the mask will
2415  // contain only elements from the first one and the second one will be undef.
2416  if (N->getOperand(1).isUndef()) {
2417  if ((M0 | M1) < 2) {
2418  DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2419  Swap = false;
2420  return true;
2421  } else
2422  return false;
2423  }
2424 
2425  if (IsLE) {
2426  if (M0 > 1 && M1 < 2) {
2427  Swap = false;
2428  } else if (M0 < 2 && M1 > 1) {
2429  M0 = (M0 + 2) % 4;
2430  M1 = (M1 + 2) % 4;
2431  Swap = true;
2432  } else
2433  return false;
2434 
2435  // Note: if control flow comes here that means Swap is already set above
2436  DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2437  return true;
2438  } else { // BE
2439  if (M0 < 2 && M1 > 1) {
2440  Swap = false;
2441  } else if (M0 > 1 && M1 < 2) {
2442  M0 = (M0 + 2) % 4;
2443  M1 = (M1 + 2) % 4;
2444  Swap = true;
2445  } else
2446  return false;
2447 
2448  // Note: if control flow comes here that means Swap is already set above
2449  DM = (M0 << 1) + (M1 & 1);
2450  return true;
2451  }
2452 }
2453 
2454 
2455 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2456 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2457 /// elements are counted from the left of the vector register).
2458 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2459  SelectionDAG &DAG) {
2460  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2461  assert(isSplatShuffleMask(SVOp, EltSize));
2462  EVT VT = SVOp->getValueType(0);
2463 
2464  if (VT == MVT::v2i64 || VT == MVT::v2f64)
2465  return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2466  : SVOp->getMaskElt(0);
2467 
2468  if (DAG.getDataLayout().isLittleEndian())
2469  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2470  else
2471  return SVOp->getMaskElt(0) / EltSize;
2472 }
2473 
2474 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2475 /// by using a vspltis[bhw] instruction of the specified element size, return
2476 /// the constant being splatted. The ByteSize field indicates the number of
2477 /// bytes of each element [124] -> [bhw].
2478 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2479  SDValue OpVal;
2480 
2481  // If ByteSize of the splat is bigger than the element size of the
2482  // build_vector, then we have a case where we are checking for a splat where
2483  // multiple elements of the buildvector are folded together into a single
2484  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2485  unsigned EltSize = 16/N->getNumOperands();
2486  if (EltSize < ByteSize) {
2487  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2488  SDValue UniquedVals[4];
2489  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2490 
2491  // See if all of the elements in the buildvector agree across.
2492  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2493  if (N->getOperand(i).isUndef()) continue;
2494  // If the element isn't a constant, bail fully out.
2495  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2496 
2497  if (!UniquedVals[i&(Multiple-1)].getNode())
2498  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2499  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2500  return SDValue(); // no match.
2501  }
2502 
2503  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2504  // either constant or undef values that are identical for each chunk. See
2505  // if these chunks can form into a larger vspltis*.
2506 
2507  // Check to see if all of the leading entries are either 0 or -1. If
2508  // neither, then this won't fit into the immediate field.
2509  bool LeadingZero = true;
2510  bool LeadingOnes = true;
2511  for (unsigned i = 0; i != Multiple-1; ++i) {
2512  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2513 
2514  LeadingZero &= isNullConstant(UniquedVals[i]);
2515  LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2516  }
2517  // Finally, check the least significant entry.
2518  if (LeadingZero) {
2519  if (!UniquedVals[Multiple-1].getNode())
2520  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2521  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2522  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2523  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2524  }
2525  if (LeadingOnes) {
2526  if (!UniquedVals[Multiple-1].getNode())
2527  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2528  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2529  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2530  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2531  }
2532 
2533  return SDValue();
2534  }
2535 
2536  // Check to see if this buildvec has a single non-undef value in its elements.
2537  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2538  if (N->getOperand(i).isUndef()) continue;
2539  if (!OpVal.getNode())
2540  OpVal = N->getOperand(i);
2541  else if (OpVal != N->getOperand(i))
2542  return SDValue();
2543  }
2544 
2545  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2546 
2547  unsigned ValSizeInBytes = EltSize;
2548  uint64_t Value = 0;
2549  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2550  Value = CN->getZExtValue();
2551  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2552  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2553  Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2554  }
2555 
2556  // If the splat value is larger than the element value, then we can never do
2557  // this splat. The only case that we could fit the replicated bits into our
2558  // immediate field for would be zero, and we prefer to use vxor for it.
2559  if (ValSizeInBytes < ByteSize) return SDValue();
2560 
2561  // If the element value is larger than the splat value, check if it consists
2562  // of a repeated bit pattern of size ByteSize.
2563  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2564  return SDValue();
2565 
2566  // Properly sign extend the value.
2567  int MaskVal = SignExtend32(Value, ByteSize * 8);
2568 
2569  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2570  if (MaskVal == 0) return SDValue();
2571 
2572  // Finally, if this value fits in a 5 bit sext field, return it
2573  if (SignExtend32<5>(MaskVal) == MaskVal)
2574  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2575  return SDValue();
2576 }
2577 
2578 //===----------------------------------------------------------------------===//
2579 // Addressing Mode Selection
2580 //===----------------------------------------------------------------------===//
2581 
2582 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2583 /// or 64-bit immediate, and if the value can be accurately represented as a
2584 /// sign extension from a 16-bit value. If so, this returns true and the
2585 /// immediate.
2587  if (!isa<ConstantSDNode>(N))
2588  return false;
2589 
2590  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2591  if (N->getValueType(0) == MVT::i32)
2592  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2593  else
2594  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2595 }
2597  return isIntS16Immediate(Op.getNode(), Imm);
2598 }
2599 
2600 /// Used when computing address flags for selecting loads and stores.
2601 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2602 /// An OR of two provably disjoint values is equivalent to an ADD.
2603 /// Most PPC load/store instructions compute the effective address as a sum,
2604 /// so doing this conversion is useful.
2605 static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2606  if (N.getOpcode() != ISD::OR)
2607  return false;
2608  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2609  if (!LHSKnown.Zero.getBoolValue())
2610  return false;
2611  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2612  return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2613 }
2614 
2615 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2616 /// be represented as an indexed [r+r] operation.
2618  SDValue &Index,
2619  SelectionDAG &DAG) const {
2620  for (SDNode *U : N->uses()) {
2621  if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2622  if (Memop->getMemoryVT() == MVT::f64) {
2623  Base = N.getOperand(0);
2624  Index = N.getOperand(1);
2625  return true;
2626  }
2627  }
2628  }
2629  return false;
2630 }
2631 
2632 /// isIntS34Immediate - This method tests if value of node given can be
2633 /// accurately represented as a sign extension from a 34-bit value. If so,
2634 /// this returns true and the immediate.
2636  if (!isa<ConstantSDNode>(N))
2637  return false;
2638 
2639  Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2640  return isInt<34>(Imm);
2641 }
2643  return isIntS34Immediate(Op.getNode(), Imm);
2644 }
2645 
2646 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2647 /// can be represented as an indexed [r+r] operation. Returns false if it
2648 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2649 /// non-zero and N can be represented by a base register plus a signed 16-bit
2650 /// displacement, make a more precise judgement by checking (displacement % \p
2651 /// EncodingAlignment).
2653  SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2654  MaybeAlign EncodingAlignment) const {
2655  // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2656  // a [pc+imm].
2657  if (SelectAddressPCRel(N, Base))
2658  return false;
2659 
2660  int16_t Imm = 0;
2661  if (N.getOpcode() == ISD::ADD) {
2662  // Is there any SPE load/store (f64), which can't handle 16bit offset?
2663  // SPE load/store can only handle 8-bit offsets.
2664  if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2665  return true;
2666  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2667  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2668  return false; // r+i
2669  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2670  return false; // r+i
2671 
2672  Base = N.getOperand(0);
2673  Index = N.getOperand(1);
2674  return true;
2675  } else if (N.getOpcode() == ISD::OR) {
2676  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2677  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2678  return false; // r+i can fold it if we can.
2679 
2680  // If this is an or of disjoint bitfields, we can codegen this as an add
2681  // (for better address arithmetic) if the LHS and RHS of the OR are provably
2682  // disjoint.
2683  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2684 
2685  if (LHSKnown.Zero.getBoolValue()) {
2686  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2687  // If all of the bits are known zero on the LHS or RHS, the add won't
2688  // carry.
2689  if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2690  Base = N.getOperand(0);
2691  Index = N.getOperand(1);
2692  return true;
2693  }
2694  }
2695  }
2696 
2697  return false;
2698 }
2699 
2700 // If we happen to be doing an i64 load or store into a stack slot that has
2701 // less than a 4-byte alignment, then the frame-index elimination may need to
2702 // use an indexed load or store instruction (because the offset may not be a
2703 // multiple of 4). The extra register needed to hold the offset comes from the
2704 // register scavenger, and it is possible that the scavenger will need to use
2705 // an emergency spill slot. As a result, we need to make sure that a spill slot
2706 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2707 // stack slot.
2708 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2709  // FIXME: This does not handle the LWA case.
2710  if (VT != MVT::i64)
2711  return;
2712 
2713  // NOTE: We'll exclude negative FIs here, which come from argument
2714  // lowering, because there are no known test cases triggering this problem
2715  // using packed structures (or similar). We can remove this exclusion if
2716  // we find such a test case. The reason why this is so test-case driven is
2717  // because this entire 'fixup' is only to prevent crashes (from the
2718  // register scavenger) on not-really-valid inputs. For example, if we have:
2719  // %a = alloca i1
2720  // %b = bitcast i1* %a to i64*
2721  // store i64* a, i64 b
2722  // then the store should really be marked as 'align 1', but is not. If it
2723  // were marked as 'align 1' then the indexed form would have been
2724  // instruction-selected initially, and the problem this 'fixup' is preventing
2725  // won't happen regardless.
2726  if (FrameIdx < 0)
2727  return;
2728 
2729  MachineFunction &MF = DAG.getMachineFunction();
2730  MachineFrameInfo &MFI = MF.getFrameInfo();
2731 
2732  if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2733  return;
2734 
2735  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2736  FuncInfo->setHasNonRISpills();
2737 }
2738 
2739 /// Returns true if the address N can be represented by a base register plus
2740 /// a signed 16-bit displacement [r+imm], and if it is not better
2741 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2742 /// displacements that are multiples of that value.
2744  SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2745  MaybeAlign EncodingAlignment) const {
2746  // FIXME dl should come from parent load or store, not from address
2747  SDLoc dl(N);
2748 
2749  // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2750  // a [pc+imm].
2751  if (SelectAddressPCRel(N, Base))
2752  return false;
2753 
2754  // If this can be more profitably realized as r+r, fail.
2755  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2756  return false;
2757 
2758  if (N.getOpcode() == ISD::ADD) {
2759  int16_t imm = 0;
2760  if (isIntS16Immediate(N.getOperand(1), imm) &&
2761  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2762  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2763  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2764  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2765  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2766  } else {
2767  Base = N.getOperand(0);
2768  }
2769  return true; // [r+i]
2770  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2771  // Match LOAD (ADD (X, Lo(G))).
2772  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2773  && "Cannot handle constant offsets yet!");
2774  Disp = N.getOperand(1).getOperand(0); // The global address.
2777  Disp.getOpcode() == ISD::TargetConstantPool ||
2778  Disp.getOpcode() == ISD::TargetJumpTable);
2779  Base = N.getOperand(0);
2780  return true; // [&g+r]
2781  }
2782  } else if (N.getOpcode() == ISD::OR) {
2783  int16_t imm = 0;
2784  if (isIntS16Immediate(N.getOperand(1), imm) &&
2785  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2786  // If this is an or of disjoint bitfields, we can codegen this as an add
2787  // (for better address arithmetic) if the LHS and RHS of the OR are
2788  // provably disjoint.
2789  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2790 
2791  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2792  // If all of the bits are known zero on the LHS or RHS, the add won't
2793  // carry.
2794  if (FrameIndexSDNode *FI =
2795  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2796  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2797  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2798  } else {
2799  Base = N.getOperand(0);
2800  }
2801  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2802  return true;
2803  }
2804  }
2805  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2806  // Loading from a constant address.
2807 
2808  // If this address fits entirely in a 16-bit sext immediate field, codegen
2809  // this as "d, 0"
2810  int16_t Imm;
2811  if (isIntS16Immediate(CN, Imm) &&
2812  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2813  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2814  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2815  CN->getValueType(0));
2816  return true;
2817  }
2818 
2819  // Handle 32-bit sext immediates with LIS + addr mode.
2820  if ((CN->getValueType(0) == MVT::i32 ||
2821  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2822  (!EncodingAlignment ||
2823  isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2824  int Addr = (int)CN->getZExtValue();
2825 
2826  // Otherwise, break this down into an LIS + disp.
2827  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2828 
2829  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2830  MVT::i32);
2831  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2832  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2833  return true;
2834  }
2835  }
2836 
2837  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2838  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2839  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2840  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2841  } else
2842  Base = N;
2843  return true; // [r+0]
2844 }
2845 
2846 /// Similar to the 16-bit case but for instructions that take a 34-bit
2847 /// displacement field (prefixed loads/stores).
2849  SDValue &Base,
2850  SelectionDAG &DAG) const {
2851  // Only on 64-bit targets.
2852  if (N.getValueType() != MVT::i64)
2853  return false;
2854 
2855  SDLoc dl(N);
2856  int64_t Imm = 0;
2857 
2858  if (N.getOpcode() == ISD::ADD) {
2859  if (!isIntS34Immediate(N.getOperand(1), Imm))
2860  return false;
2861  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2862  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2863  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2864  else
2865  Base = N.getOperand(0);
2866  return true;
2867  }
2868 
2869  if (N.getOpcode() == ISD::OR) {
2870  if (!isIntS34Immediate(N.getOperand(1), Imm))
2871  return false;
2872  // If this is an or of disjoint bitfields, we can codegen this as an add
2873  // (for better address arithmetic) if the LHS and RHS of the OR are
2874  // provably disjoint.
2875  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2876  if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2877  return false;
2878  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2879  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2880  else
2881  Base = N.getOperand(0);
2882  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2883  return true;
2884  }
2885 
2886  if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2887  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2888  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2889  return true;
2890  }
2891 
2892  return false;
2893 }
2894 
2895 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2896 /// represented as an indexed [r+r] operation.
2898  SDValue &Index,
2899  SelectionDAG &DAG) const {
2900  // Check to see if we can easily represent this as an [r+r] address. This
2901  // will fail if it thinks that the address is more profitably represented as
2902  // reg+imm, e.g. where imm = 0.
2903  if (SelectAddressRegReg(N, Base, Index, DAG))
2904  return true;
2905 
2906  // If the address is the result of an add, we will utilize the fact that the
2907  // address calculation includes an implicit add. However, we can reduce
2908  // register pressure if we do not materialize a constant just for use as the
2909  // index register. We only get rid of the add if it is not an add of a
2910  // value and a 16-bit signed constant and both have a single use.
2911  int16_t imm = 0;
2912  if (N.getOpcode() == ISD::ADD &&
2913  (!isIntS16Immediate(N.getOperand(1), imm) ||
2914  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2915  Base = N.getOperand(0);
2916  Index = N.getOperand(1);
2917  return true;
2918  }
2919 
2920  // Otherwise, do it the hard way, using R0 as the base register.
2921  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2922  N.getValueType());
2923  Index = N;
2924  return true;
2925 }
2926 
2927 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2928  Ty *PCRelCand = dyn_cast<Ty>(N);
2929  return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2930 }
2931 
2932 /// Returns true if this address is a PC Relative address.
2933 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2934 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2936  // This is a materialize PC Relative node. Always select this as PC Relative.
2937  Base = N;
2938  if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2939  return true;
2940  if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2941  isValidPCRelNode<GlobalAddressSDNode>(N) ||
2942  isValidPCRelNode<JumpTableSDNode>(N) ||
2943  isValidPCRelNode<BlockAddressSDNode>(N))
2944  return true;
2945  return false;
2946 }
2947 
2948 /// Returns true if we should use a direct load into vector instruction
2949 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2951 
2952  // If there are any other uses other than scalar to vector, then we should
2953  // keep it as a scalar load -> direct move pattern to prevent multiple
2954  // loads.
2955  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2956  if (!LD)
2957  return false;
2958 
2959  EVT MemVT = LD->getMemoryVT();
2960  if (!MemVT.isSimple())
2961  return false;
2962  switch(MemVT.getSimpleVT().SimpleTy) {
2963  case MVT::i64:
2964  break;
2965  case MVT::i32:
2966  if (!ST.hasP8Vector())
2967  return false;
2968  break;
2969  case MVT::i16:
2970  case MVT::i8:
2971  if (!ST.hasP9Vector())
2972  return false;
2973  break;
2974  default:
2975  return false;
2976  }
2977 
2978  SDValue LoadedVal(N, 0);
2979  if (!LoadedVal.hasOneUse())
2980  return false;
2981 
2982  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2983  UI != UE; ++UI)
2984  if (UI.getUse().get().getResNo() == 0 &&
2985  UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2986  UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2987  return false;
2988 
2989  return true;
2990 }
2991 
2992 /// getPreIndexedAddressParts - returns true by value, base pointer and
2993 /// offset pointer and addressing mode by reference if the node's address
2994 /// can be legally represented as pre-indexed load / store address.
2996  SDValue &Offset,
2997  ISD::MemIndexedMode &AM,
2998  SelectionDAG &DAG) const {
2999  if (DisablePPCPreinc) return false;
3000 
3001  bool isLoad = true;
3002  SDValue Ptr;
3003  EVT VT;
3004  Align Alignment;
3005  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3006  Ptr = LD->getBasePtr();
3007  VT = LD->getMemoryVT();
3008  Alignment = LD->getAlign();
3009  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3010  Ptr = ST->getBasePtr();
3011  VT = ST->getMemoryVT();
3012  Alignment = ST->getAlign();
3013  isLoad = false;
3014  } else
3015  return false;
3016 
3017  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3018  // instructions because we can fold these into a more efficient instruction
3019  // instead, (such as LXSD).
3020  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3021  return false;
3022  }
3023 
3024  // PowerPC doesn't have preinc load/store instructions for vectors
3025  if (VT.isVector())
3026  return false;
3027 
3028  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3029  // Common code will reject creating a pre-inc form if the base pointer
3030  // is a frame index, or if N is a store and the base pointer is either
3031  // the same as or a predecessor of the value being stored. Check for
3032  // those situations here, and try with swapped Base/Offset instead.
3033  bool Swap = false;
3034 
3035  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3036  Swap = true;
3037  else if (!isLoad) {
3038  SDValue Val = cast<StoreSDNode>(N)->getValue();
3039  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3040  Swap = true;
3041  }
3042 
3043  if (Swap)
3044  std::swap(Base, Offset);
3045 
3046  AM = ISD::PRE_INC;
3047  return true;
3048  }
3049 
3050  // LDU/STU can only handle immediates that are a multiple of 4.
3051  if (VT != MVT::i64) {
3052  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3053  return false;
3054  } else {
3055  // LDU/STU need an address with at least 4-byte alignment.
3056  if (Alignment < Align(4))
3057  return false;
3058 
3059  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3060  return false;
3061  }
3062 
3063  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3064  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3065  // sext i32 to i64 when addr mode is r+i.
3066  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3067  LD->getExtensionType() == ISD::SEXTLOAD &&
3068  isa<ConstantSDNode>(Offset))
3069  return false;
3070  }
3071 
3072  AM = ISD::PRE_INC;
3073  return true;
3074 }
3075 
3076 //===----------------------------------------------------------------------===//
3077 // LowerOperation implementation
3078 //===----------------------------------------------------------------------===//
3079 
3080 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3081 /// and LoOpFlags to the target MO flags.
3082 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3083  unsigned &HiOpFlags, unsigned &LoOpFlags,
3084  const GlobalValue *GV = nullptr) {
3085  HiOpFlags = PPCII::MO_HA;
3086  LoOpFlags = PPCII::MO_LO;
3087 
3088  // Don't use the pic base if not in PIC relocation model.
3089  if (IsPIC) {
3090  HiOpFlags |= PPCII::MO_PIC_FLAG;
3091  LoOpFlags |= PPCII::MO_PIC_FLAG;
3092  }
3093 }
3094 
3095 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3096  SelectionDAG &DAG) {
3097  SDLoc DL(HiPart);
3098  EVT PtrVT = HiPart.getValueType();
3099  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3100 
3101  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3102  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3103 
3104  // With PIC, the first instruction is actually "GR+hi(&G)".
3105  if (isPIC)
3106  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3107  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3108 
3109  // Generate non-pic code that has direct accesses to the constant pool.
3110  // The address of the global is just (hi(&g)+lo(&g)).
3111  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3112 }
3113 
3115  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3116  FuncInfo->setUsesTOCBasePtr();
3117 }
3118 
3119 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3121 }
3122 
3123 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3124  SDValue GA) const {
3125  const bool Is64Bit = Subtarget.isPPC64();
3126  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3127  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3128  : Subtarget.isAIXABI()
3129  ? DAG.getRegister(PPC::R2, VT)
3130  : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3131  SDValue Ops[] = { GA, Reg };
3132  return DAG.getMemIntrinsicNode(
3133  PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3134  MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt,
3136 }
3137 
3138 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3139  SelectionDAG &DAG) const {
3140  EVT PtrVT = Op.getValueType();
3141  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3142  const Constant *C = CP->getConstVal();
3143 
3144  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3145  // The actual address of the GlobalValue is stored in the TOC.
3146  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3147  if (Subtarget.isUsingPCRelativeCalls()) {
3148  SDLoc DL(CP);
3149  EVT Ty = getPointerTy(DAG.getDataLayout());
3150  SDValue ConstPool = DAG.getTargetConstantPool(
3151  C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3152  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3153  }
3154  setUsesTOCBasePtr(DAG);
3155  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3156  return getTOCEntry(DAG, SDLoc(CP), GA);
3157  }
3158 
3159  unsigned MOHiFlag, MOLoFlag;
3160  bool IsPIC = isPositionIndependent();
3161  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3162 
3163  if (IsPIC && Subtarget.isSVR4ABI()) {
3164  SDValue GA =
3165  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3166  return getTOCEntry(DAG, SDLoc(CP), GA);
3167  }
3168 
3169  SDValue CPIHi =
3170  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3171  SDValue CPILo =
3172  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3173  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3174 }
3175 
3176 // For 64-bit PowerPC, prefer the more compact relative encodings.
3177 // This trades 32 bits per jump table entry for one or two instructions
3178 // on the jump site.
3180  if (isJumpTableRelative())
3182 
3184 }
3185 
3188  return false;
3189  if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3190  return true;
3192 }
3193 
3195  SelectionDAG &DAG) const {
3196  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3197  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3198 
3199  switch (getTargetMachine().getCodeModel()) {
3200  case CodeModel::Small:
3201  case CodeModel::Medium:
3202  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3203  default:
3204  return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3205  getPointerTy(DAG.getDataLayout()));
3206  }
3207 }
3208 
3209 const MCExpr *
3211  unsigned JTI,
3212  MCContext &Ctx) const {
3213  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3214  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3215 
3216  switch (getTargetMachine().getCodeModel()) {
3217  case CodeModel::Small:
3218  case CodeModel::Medium:
3219  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3220  default:
3221  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3222  }
3223 }
3224 
3225 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3226  EVT PtrVT = Op.getValueType();
3227  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3228 
3229  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3230  if (Subtarget.isUsingPCRelativeCalls()) {
3231  SDLoc DL(JT);
3232  EVT Ty = getPointerTy(DAG.getDataLayout());
3233  SDValue GA =
3234  DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3235  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3236  return MatAddr;
3237  }
3238 
3239  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3240  // The actual address of the GlobalValue is stored in the TOC.
3241  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3242  setUsesTOCBasePtr(DAG);
3243  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3244  return getTOCEntry(DAG, SDLoc(JT), GA);
3245  }
3246 
3247  unsigned MOHiFlag, MOLoFlag;
3248  bool IsPIC = isPositionIndependent();
3249  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3250 
3251  if (IsPIC && Subtarget.isSVR4ABI()) {
3252  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3254  return getTOCEntry(DAG, SDLoc(GA), GA);
3255  }
3256 
3257  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3258  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3259  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3260 }
3261 
3262 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3263  SelectionDAG &DAG) const {
3264  EVT PtrVT = Op.getValueType();
3265  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3266  const BlockAddress *BA = BASDN->getBlockAddress();
3267 
3268  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3269  if (Subtarget.isUsingPCRelativeCalls()) {
3270  SDLoc DL(BASDN);
3271  EVT Ty = getPointerTy(DAG.getDataLayout());
3272  SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3274  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3275  return MatAddr;
3276  }
3277 
3278  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3279  // The actual BlockAddress is stored in the TOC.
3280  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3281  setUsesTOCBasePtr(DAG);
3282  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3283  return getTOCEntry(DAG, SDLoc(BASDN), GA);
3284  }
3285 
3286  // 32-bit position-independent ELF stores the BlockAddress in the .got.
3287  if (Subtarget.is32BitELFABI() && isPositionIndependent())
3288  return getTOCEntry(
3289  DAG, SDLoc(BASDN),
3290  DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3291 
3292  unsigned MOHiFlag, MOLoFlag;
3293  bool IsPIC = isPositionIndependent();
3294  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3295  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3296  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3297  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3298 }
3299 
3300 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3301  SelectionDAG &DAG) const {
3302  if (Subtarget.isAIXABI())
3303  return LowerGlobalTLSAddressAIX(Op, DAG);
3304 
3305  return LowerGlobalTLSAddressLinux(Op, DAG);
3306 }
3307 
3308 SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3309  SelectionDAG &DAG) const {
3310  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3311 
3312  if (DAG.getTarget().useEmulatedTLS())
3313  report_fatal_error("Emulated TLS is not yet supported on AIX");
3314 
3315  SDLoc dl(GA);
3316  const GlobalValue *GV = GA->getGlobal();
3317  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3318 
3319  // The general-dynamic model is the only access model supported for now, so
3320  // all the GlobalTLSAddress nodes are lowered with this model.
3321  // We need to generate two TOC entries, one for the variable offset, one for
3322  // the region handle. The global address for the TOC entry of the region
3323  // handle is created with the MO_TLSGDM_FLAG flag and the global address
3324  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3325  SDValue VariableOffsetTGA =
3326  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3327  SDValue RegionHandleTGA =
3328  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3329  SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3330  SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3331  return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3332  RegionHandle);
3333 }
3334 
3335 SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3336  SelectionDAG &DAG) const {
3337  // FIXME: TLS addresses currently use medium model code sequences,
3338  // which is the most useful form. Eventually support for small and
3339  // large models could be added if users need it, at the cost of
3340  // additional complexity.
3341  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3342  if (DAG.getTarget().useEmulatedTLS())
3343  return LowerToTLSEmulatedModel(GA, DAG);
3344 
3345  SDLoc dl(GA);
3346  const GlobalValue *GV = GA->getGlobal();
3347  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3348  bool is64bit = Subtarget.isPPC64();
3349  const Module *M = DAG.getMachineFunction().getFunction().getParent();
3350  PICLevel::Level picLevel = M->getPICLevel();
3351 
3352  const TargetMachine &TM = getTargetMachine();
3353  TLSModel::Model Model = TM.getTLSModel(GV);
3354 
3355  if (Model == TLSModel::LocalExec) {
3356  if (Subtarget.isUsingPCRelativeCalls()) {
3357  SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3358  SDValue TGA = DAG.getTargetGlobalAddress(
3359  GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3360  SDValue MatAddr =
3361  DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3362  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3363  }
3364 
3365  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3367  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3369  SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3370  : DAG.getRegister(PPC::R2, MVT::i32);
3371 
3372  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3373  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3374  }
3375 
3376  if (Model == TLSModel::InitialExec) {
3377  bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3378  SDValue TGA = DAG.getTargetGlobalAddress(
3379  GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3380  SDValue TGATLS = DAG.getTargetGlobalAddress(
3381  GV, dl, PtrVT, 0,
3383  SDValue TPOffset;
3384  if (IsPCRel) {
3385  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3386  TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3387  MachinePointerInfo());
3388  } else {
3389  SDValue GOTPtr;
3390  if (is64bit) {
3391  setUsesTOCBasePtr(DAG);
3392  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3393  GOTPtr =
3394  DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3395  } else {
3396  if (!TM.isPositionIndependent())
3397  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3398  else if (picLevel == PICLevel::SmallPIC)
3399  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3400  else
3401  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3402  }
3403  TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3404  }
3405  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3406  }
3407 
3409  if (Subtarget.isUsingPCRelativeCalls()) {
3410  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3412  return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3413  }
3414 
3415  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3416  SDValue GOTPtr;
3417  if (is64bit) {
3418  setUsesTOCBasePtr(DAG);
3419  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3420  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3421  GOTReg, TGA);
3422  } else {
3423  if (picLevel == PICLevel::SmallPIC)
3424  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3425  else
3426  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3427  }
3428  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3429  GOTPtr, TGA, TGA);
3430  }
3431 
3432  if (Model == TLSModel::LocalDynamic) {
3433  if (Subtarget.isUsingPCRelativeCalls()) {
3434  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3436  SDValue MatPCRel =
3437  DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3438  return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3439  }
3440 
3441  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3442  SDValue GOTPtr;
3443  if (is64bit) {
3444  setUsesTOCBasePtr(DAG);
3445  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3446  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3447  GOTReg, TGA);
3448  } else {
3449  if (picLevel == PICLevel::SmallPIC)
3450  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3451  else
3452  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3453  }
3454  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3455  PtrVT, GOTPtr, TGA, TGA);
3456  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3457  PtrVT, TLSAddr, TGA);
3458  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3459  }
3460 
3461  llvm_unreachable("Unknown TLS model!");
3462 }
3463 
3464 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3465  SelectionDAG &DAG) const {
3466  EVT PtrVT = Op.getValueType();
3467  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3468  SDLoc DL(GSDN);
3469  const GlobalValue *GV = GSDN->getGlobal();
3470 
3471  // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3472  // The actual address of the GlobalValue is stored in the TOC.
3473  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3474  if (Subtarget.isUsingPCRelativeCalls()) {
3475  EVT Ty = getPointerTy(DAG.getDataLayout());
3476  if (isAccessedAsGotIndirect(Op)) {
3477  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3480  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3481  SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3482  MachinePointerInfo());
3483  return Load;
3484  } else {
3485  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3487  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3488  }
3489  }
3490  setUsesTOCBasePtr(DAG);
3491  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3492  return getTOCEntry(DAG, DL, GA);
3493  }
3494 
3495  unsigned MOHiFlag, MOLoFlag;
3496  bool IsPIC = isPositionIndependent();
3497  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3498 
3499  if (IsPIC && Subtarget.isSVR4ABI()) {
3500  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3501  GSDN->getOffset(),
3503  return getTOCEntry(DAG, DL, GA);
3504  }
3505 
3506  SDValue GAHi =
3507  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3508  SDValue GALo =
3509  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3510 
3511  return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3512 }
3513 
3514 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3515  bool IsStrict = Op->isStrictFPOpcode();
3516  ISD::CondCode CC =
3517  cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3518  SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3519  SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3520  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3521  EVT LHSVT = LHS.getValueType();
3522  SDLoc dl(Op);
3523 
3524  // Soften the setcc with libcall if it is fp128.
3525  if (LHSVT == MVT::f128) {
3526  assert(!Subtarget.hasP9Vector() &&
3527  "SETCC for f128 is already legal under Power9!");
3528  softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3529  Op->getOpcode() == ISD::STRICT_FSETCCS);
3530  if (RHS.getNode())
3531  LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3532  DAG.getCondCode(CC));
3533  if (IsStrict)
3534  return DAG.getMergeValues({LHS, Chain}, dl);
3535  return LHS;
3536  }
3537 
3538  assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3539 
3540  if (Op.getValueType() == MVT::v2i64) {
3541  // When the operands themselves are v2i64 values, we need to do something
3542  // special because VSX has no underlying comparison operations for these.
3543  if (LHS.getValueType() == MVT::v2i64) {
3544  // Equality can be handled by casting to the legal type for Altivec
3545  // comparisons, everything else needs to be expanded.
3546  if (CC != ISD::SETEQ && CC != ISD::SETNE)
3547  return SDValue();
3548  SDValue SetCC32 = DAG.getSetCC(
3549  dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3550  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3551  int ShuffV[] = {1, 0, 3, 2};
3552  SDValue Shuff =
3553  DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3554  return DAG.getBitcast(MVT::v2i64,
3555  DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3556  dl, MVT::v4i32, Shuff, SetCC32));
3557  }
3558 
3559  // We handle most of these in the usual way.
3560  return Op;
3561  }
3562 
3563  // If we're comparing for equality to zero, expose the fact that this is
3564  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3565  // fold the new nodes.
3566  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3567  return V;
3568 
3569  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3570  // Leave comparisons against 0 and -1 alone for now, since they're usually
3571  // optimized. FIXME: revisit this when we can custom lower all setcc
3572  // optimizations.
3573  if (C->isAllOnes() || C->isZero())
3574  return SDValue();
3575  }
3576 
3577  // If we have an integer seteq/setne, turn it into a compare against zero
3578  // by xor'ing the rhs with the lhs, which is faster than setting a
3579  // condition register, reading it back out, and masking the correct bit. The
3580  // normal approach here uses sub to do this instead of xor. Using xor exposes
3581  // the result to other bit-twiddling opportunities.
3582  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3583  EVT VT = Op.getValueType();
3584  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3585  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3586  }
3587  return SDValue();
3588 }
3589 
3590 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3591  SDNode *Node = Op.getNode();
3592  EVT VT = Node->getValueType(0);
3593  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3594  SDValue InChain = Node->getOperand(0);
3595  SDValue VAListPtr = Node->getOperand(1);
3596  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3597  SDLoc dl(Node);
3598 
3599  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3600 
3601  // gpr_index
3602  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3603  VAListPtr, MachinePointerInfo(SV), MVT::i8);
3604  InChain = GprIndex.getValue(1);
3605 
3606  if (VT == MVT::i64) {
3607  // Check if GprIndex is even
3608  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3609  DAG.getConstant(1, dl, MVT::i32));
3610  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3611  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3612  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3613  DAG.getConstant(1, dl, MVT::i32));
3614  // Align GprIndex to be even if it isn't
3615  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3616  GprIndex);
3617  }
3618 
3619  // fpr index is 1 byte after gpr
3620  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3621  DAG.getConstant(1, dl, MVT::i32));
3622 
3623  // fpr
3624  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3625  FprPtr, MachinePointerInfo(SV), MVT::i8);
3626  InChain = FprIndex.getValue(1);
3627 
3628  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3629  DAG.getConstant(8, dl, MVT::i32));
3630 
3631  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3632  DAG.getConstant(4, dl, MVT::i32));
3633 
3634  // areas
3635  SDValue OverflowArea =
3636  DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3637  InChain = OverflowArea.getValue(1);
3638 
3639  SDValue RegSaveArea =
3640  DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3641  InChain = RegSaveArea.getValue(1);
3642 
3643  // select overflow_area if index > 8
3644  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3645  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3646 
3647  // adjustment constant gpr_index * 4/8
3648  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3649  VT.isInteger() ? GprIndex : FprIndex,
3650  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3651  MVT::i32));
3652 
3653  // OurReg = RegSaveArea + RegConstant
3654  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3655  RegConstant);
3656 
3657  // Floating types are 32 bytes into RegSaveArea
3658  if (VT.isFloatingPoint())
3659  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3660  DAG.getConstant(32, dl, MVT::i32));
3661 
3662  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3663  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3664  VT.isInteger() ? GprIndex : FprIndex,
3665  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3666  MVT::i32));
3667 
3668  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3669  VT.isInteger() ? VAListPtr : FprPtr,
3671 
3672  // determine if we should load from reg_save_area or overflow_area
3673  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3674 
3675  // increase overflow_area by 4/8 if gpr/fpr > 8
3676  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3677  DAG.getConstant(VT.isInteger() ? 4 : 8,
3678  dl, MVT::i32));
3679 
3680  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3681  OverflowAreaPlusN);
3682 
3683  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3685 
3686  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3687 }
3688 
3689 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3690  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3691 
3692  // We have to copy the entire va_list struct:
3693  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3694  return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3695  DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3696  false, true, false, MachinePointerInfo(),
3697  MachinePointerInfo());
3698 }
3699 
3700 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3701  SelectionDAG &DAG) const {
3702  if (Subtarget.isAIXABI())
3703  report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3704 
3705  return Op.getOperand(0);
3706 }
3707 
3708 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3709  MachineFunction &MF = DAG.getMachineFunction();
3710  PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3711 
3712  assert((Op.getOpcode() == ISD::INLINEASM ||
3713  Op.getOpcode() == ISD::INLINEASM_BR) &&
3714  "Expecting Inline ASM node.");
3715 
3716  // If an LR store is already known to be required then there is not point in
3717  // checking this ASM as well.
3718  if (MFI.isLRStoreRequired())
3719  return Op;
3720 
3721  // Inline ASM nodes have an optional last operand that is an incoming Flag of
3722  // type MVT::Glue. We want to ignore this last operand if that is the case.
3723  unsigned NumOps = Op.getNumOperands();
3724  if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3725  --NumOps;
3726 
3727  // Check all operands that may contain the LR.
3728  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3729  unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3730  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3731  ++i; // Skip the ID value.
3732 
3733  switch (InlineAsm::getKind(Flags)) {
3734  default:
3735  llvm_unreachable("Bad flags!");
3737  case InlineAsm::Kind_Imm:
3738  case InlineAsm::Kind_Mem:
3739  i += NumVals;
3740  break;
3744  for (; NumVals; --NumVals, ++i) {
3745  Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3746  if (Reg != PPC::LR && Reg != PPC::LR8)
3747  continue;
3748  MFI.setLRStoreRequired();
3749  return Op;
3750  }
3751  break;
3752  }
3753  }
3754  }
3755 
3756  return Op;
3757 }
3758 
3759 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3760  SelectionDAG &DAG) const {
3761  if (Subtarget.isAIXABI())
3762  report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3763 
3764  SDValue Chain = Op.getOperand(0);
3765  SDValue Trmp = Op.getOperand(1); // trampoline
3766  SDValue FPtr = Op.getOperand(2); // nested function
3767  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3768  SDLoc dl(Op);
3769 
3770  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3771  bool isPPC64 = (PtrVT == MVT::i64);
3772  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3773 
3775  TargetLowering::ArgListEntry Entry;
3776 
3777  Entry.Ty = IntPtrTy;
3778  Entry.Node = Trmp; Args.push_back(Entry);
3779 
3780  // TrampSize == (isPPC64 ? 48 : 40);
3781  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3782  isPPC64 ? MVT::i64 : MVT::i32);
3783  Args.push_back(Entry);
3784 
3785  Entry.Node = FPtr; Args.push_back(Entry);
3786  Entry.Node = Nest; Args.push_back(Entry);
3787 
3788  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3790  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3792  DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3793 
3794  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3795  return CallResult.second;
3796 }
3797 
3798 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3799  MachineFunction &MF = DAG.getMachineFunction();
3800  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3801  EVT PtrVT = getPointerTy(MF.getDataLayout());
3802 
3803  SDLoc dl(Op);
3804 
3805  if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3806  // vastart just stores the address of the VarArgsFrameIndex slot into the
3807  // memory location argument.
3808  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3809  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3810  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3811  MachinePointerInfo(SV));
3812  }
3813 
3814  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3815  // We suppose the given va_list is already allocated.
3816  //
3817  // typedef struct {
3818  // char gpr; /* index into the array of 8 GPRs
3819  // * stored in the register save area
3820  // * gpr=0 corresponds to r3,
3821  // * gpr=1 to r4, etc.
3822  // */
3823  // char fpr; /* index into the array of 8 FPRs
3824  // * stored in the register save area
3825  // * fpr=0 corresponds to f1,
3826  // * fpr=1 to f2, etc.
3827  // */
3828  // char *overflow_arg_area;
3829  // /* location on stack that holds
3830  // * the next overflow argument
3831  // */
3832  // char *reg_save_area;
3833  // /* where r3:r10 and f1:f8 (if saved)
3834  // * are stored
3835  // */
3836  // } va_list[1];
3837 
3838  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3839  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3840  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3841  PtrVT);
3842  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3843  PtrVT);
3844 
3845  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3846  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3847 
3848  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3849  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3850 
3851  uint64_t FPROffset = 1;
3852  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3853 
3854  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3855 
3856  // Store first byte : number of int regs
3857  SDValue firstStore =
3858  DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3860  uint64_t nextOffset = FPROffset;
3861  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3862  ConstFPROffset);
3863 
3864  // Store second byte : number of float regs
3865  SDValue secondStore =
3866  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3867  MachinePointerInfo(SV, nextOffset), MVT::i8);
3868  nextOffset += StackOffset;
3869  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3870 
3871  // Store second word : arguments given on stack
3872  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3873  MachinePointerInfo(SV, nextOffset));
3874  nextOffset += FrameOffset;
3875  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3876 
3877  // Store third word : arguments given in registers
3878  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3879  MachinePointerInfo(SV, nextOffset));
3880 }
3881 
3882 /// FPR - The set of FP registers that should be allocated for arguments
3883 /// on Darwin and AIX.
3884 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3885  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3886  PPC::F11, PPC::F12, PPC::F13};
3887 
3888 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3889 /// the stack.
3890 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3891  unsigned PtrByteSize) {
3892  unsigned ArgSize = ArgVT.getStoreSize();
3893  if (Flags.isByVal())
3894  ArgSize = Flags.getByValSize();
3895 
3896  // Round up to multiples of the pointer size, except for array members,
3897  // which are always packed.
3898  if (!Flags.isInConsecutiveRegs())
3899  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3900 
3901  return ArgSize;
3902 }
3903 
3904 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3905 /// on the stack.
3907  ISD::ArgFlagsTy Flags,
3908  unsigned PtrByteSize) {
3909  Align Alignment(PtrByteSize);
3910 
3911  // Altivec parameters are padded to a 16 byte boundary.
3912  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3913  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3914  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3915  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3916  Alignment = Align(16);
3917 
3918  // ByVal parameters are aligned as requested.
3919  if (Flags.isByVal()) {
3920  auto BVAlign = Flags.getNonZeroByValAlign();
3921  if (BVAlign > PtrByteSize) {
3922  if (BVAlign.value() % PtrByteSize != 0)
3924  "ByVal alignment is not a multiple of the pointer size");
3925 
3926  Alignment = BVAlign;
3927  }
3928  }
3929 
3930  // Array members are always packed to their original alignment.
3931  if (Flags.isInConsecutiveRegs()) {
3932  // If the array member was split into multiple registers, the first
3933  // needs to be aligned to the size of the full type. (Except for
3934  // ppcf128, which is only aligned as its f64 components.)
3935  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3936  Alignment = Align(OrigVT.getStoreSize());
3937  else
3938  Alignment = Align(ArgVT.getStoreSize());
3939  }
3940 
3941  return Alignment;
3942 }
3943 
3944 /// CalculateStackSlotUsed - Return whether this argument will use its
3945 /// stack slot (instead of being passed in registers). ArgOffset,
3946 /// AvailableFPRs, and AvailableVRs must hold the current argument
3947 /// position, and will be updated to account for this argument.
3948 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3949  unsigned PtrByteSize, unsigned LinkageSize,
3950  unsigned ParamAreaSize, unsigned &ArgOffset,
3951  unsigned &AvailableFPRs,
3952  unsigned &AvailableVRs) {
3953  bool UseMemory = false;
3954 
3955  // Respect alignment of argument on the stack.
3956  Align Alignment =
3957  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3958  ArgOffset = alignTo(ArgOffset, Alignment);
3959  // If there's no space left in the argument save area, we must
3960  // use memory (this check also catches zero-sized arguments).
3961  if (ArgOffset >= LinkageSize + ParamAreaSize)
3962  UseMemory = true;
3963 
3964  // Allocate argument on the stack.
3965  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3966  if (Flags.isInConsecutiveRegsLast())
3967  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3968  // If we overran the argument save area, we must use memory
3969  // (this check catches arguments passed partially in memory)
3970  if (ArgOffset > LinkageSize + ParamAreaSize)
3971  UseMemory = true;
3972 
3973  // However, if the argument is actually passed in an FPR or a VR,
3974  // we don't use memory after all.
3975  if (!Flags.isByVal()) {
3976  if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3977  if (AvailableFPRs > 0) {
3978  --AvailableFPRs;
3979  return false;
3980  }
3981  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3982  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3983  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3984  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3985  if (AvailableVRs > 0) {
3986  --AvailableVRs;
3987  return false;
3988  }
3989  }
3990 
3991  return UseMemory;
3992 }
3993 
3994 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3995 /// ensure minimum alignment required for target.
3997  unsigned NumBytes) {
3998  return alignTo(NumBytes, Lowering->getStackAlign());
3999 }
4000 
4001 SDValue PPCTargetLowering::LowerFormalArguments(
4002  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4003  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4004  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4005  if (Subtarget.isAIXABI())
4006  return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4007  InVals);
4008  if (Subtarget.is64BitELFABI())
4009  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4010  InVals);
4011  assert(Subtarget.is32BitELFABI());
4012  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4013  InVals);
4014 }
4015 
4016 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4017  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4018  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4019  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4020 
4021  // 32-bit SVR4 ABI Stack Frame Layout:
4022  // +-----------------------------------+
4023  // +--> | Back chain |
4024  // | +-----------------------------------+
4025  // | | Floating-point register save area |
4026  // | +-----------------------------------+
4027  // | | General register save area |
4028  // | +-----------------------------------+
4029  // | | CR save word |
4030  // | +-----------------------------------+
4031  // | | VRSAVE save word |
4032  // | +-----------------------------------+
4033  // | | Alignment padding |
4034  // | +-----------------------------------+
4035  // | | Vector register save area |
4036  // | +-----------------------------------+
4037  // | | Local variable space |
4038  // | +-----------------------------------+
4039  // | | Parameter list area |
4040  // | +-----------------------------------+
4041  // | | LR save word |
4042  // | +-----------------------------------+
4043  // SP--> +--- | Back chain |
4044  // +-----------------------------------+
4045  //
4046  // Specifications:
4047  // System V Application Binary Interface PowerPC Processor Supplement
4048  // AltiVec Technology Programming Interface Manual
4049 
4050  MachineFunction &MF = DAG.getMachineFunction();
4051  MachineFrameInfo &MFI = MF.getFrameInfo();
4052  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4053 
4054  EVT PtrVT = getPointerTy(MF.getDataLayout());
4055  // Potential tail calls could cause overwriting of argument stack slots.
4056  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4057  (CallConv == CallingConv::Fast));
4058  const Align PtrAlign(4);
4059 
4060  // Assign locations to all of the incoming arguments.
4062  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4063  *DAG.getContext());
4064 
4065  // Reserve space for the linkage area on the stack.
4066  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4067  CCInfo.AllocateStack(LinkageSize, PtrAlign);
4068  if (useSoftFloat())
4069  CCInfo.PreAnalyzeFormalArguments(Ins);
4070 
4071  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4072  CCInfo.clearWasPPCF128();
4073 
4074  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4075  CCValAssign &VA = ArgLocs[i];
4076 
4077  // Arguments stored in registers.
4078  if (VA.isRegLoc()) {
4079  const TargetRegisterClass *RC;
4080  EVT ValVT = VA.getValVT();
4081 
4082  switch (ValVT.getSimpleVT().SimpleTy) {
4083  default:
4084  llvm_unreachable("ValVT not supported by formal arguments Lowering");
4085  case MVT::i1:
4086  case MVT::i32:
4087  RC = &PPC::GPRCRegClass;
4088  break;
4089  case MVT::f32:
4090  if (Subtarget.hasP8Vector())
4091  RC = &PPC::VSSRCRegClass;
4092  else if (Subtarget.hasSPE())
4093  RC = &PPC::GPRCRegClass;
4094  else
4095  RC = &PPC::F4RCRegClass;
4096  break;
4097  case MVT::f64:
4098  if (Subtarget.hasVSX())
4099  RC = &PPC::VSFRCRegClass;
4100  else if (Subtarget.hasSPE())
4101  // SPE passes doubles in GPR pairs.
4102  RC = &PPC::GPRCRegClass;
4103  else
4104  RC = &PPC::F8RCRegClass;
4105  break;
4106  case MVT::v16i8:
4107  case MVT::v8i16:
4108  case MVT::v4i32:
4109  RC = &PPC::VRRCRegClass;
4110  break;
4111  case MVT::v4f32:
4112  RC = &PPC::VRRCRegClass;
4113  break;
4114  case MVT::v2f64:
4115  case MVT::v2i64:
4116  RC = &PPC::VRRCRegClass;
4117  break;
4118  }
4119 
4120  SDValue ArgValue;
4121  // Transform the arguments stored in physical registers into
4122  // virtual ones.
4123  if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4124  assert(i + 1 < e && "No second half of double precision argument");
4125  Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4126  Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4127  SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4128  SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4129  if (!Subtarget.isLittleEndian())
4130  std::swap (ArgValueLo, ArgValueHi);
4131  ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4132  ArgValueHi);
4133  } else {
4134  Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4135  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4136  ValVT == MVT::i1 ? MVT::i32 : ValVT);
4137  if (ValVT == MVT::i1)
4138  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4139  }
4140 
4141  InVals.push_back(ArgValue);
4142  } else {
4143  // Argument stored in memory.
4144  assert(VA.isMemLoc());
4145 
4146  // Get the extended size of the argument type in stack
4147  unsigned ArgSize = VA.getLocVT().getStoreSize();
4148  // Get the actual size of the argument type
4149  unsigned ObjSize = VA.getValVT().getStoreSize();
4150  unsigned ArgOffset = VA.getLocMemOffset();
4151  // Stack objects in PPC32 are right justified.
4152  ArgOffset += ArgSize - ObjSize;
4153  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4154 
4155  // Create load nodes to retrieve arguments from the stack.
4156  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4157  InVals.push_back(
4158  DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4159  }
4160  }
4161 
4162  // Assign locations to all of the incoming aggregate by value arguments.
4163  // Aggregates passed by value are stored in the local variable space of the
4164  // caller's stack frame, right above the parameter list area.
4165  SmallVector<CCValAssign, 16> ByValArgLocs;
4166  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4167  ByValArgLocs, *DAG.getContext());
4168 
4169  // Reserve stack space for the allocations in CCInfo.
4170  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4171 
4172  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4173 
4174  // Area that is at least reserved in the caller of this function.
4175  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4176  MinReservedArea = std::max(MinReservedArea, LinkageSize);
4177 
4178  // Set the size that is at least reserved in caller of this function. Tail
4179  // call optimized function's reserved stack space needs to be aligned so that
4180  // taking the difference between two stack areas will result in an aligned
4181  // stack.
4182  MinReservedArea =
4183  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4184  FuncInfo->setMinReservedArea(MinReservedArea);
4185 
4186  SmallVector<SDValue, 8> MemOps;
4187 
4188  // If the function takes variable number of arguments, make a frame index for
4189  // the start of the first vararg value... for expansion of llvm.va_start.
4190  if (isVarArg) {
4191  static const MCPhysReg GPArgRegs[] = {
4192  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4193  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4194  };
4195  const unsigned NumGPArgRegs = std::size(GPArgRegs);
4196 
4197  static const MCPhysReg FPArgRegs[] = {
4198  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4199  PPC::F8
4200  };
4201  unsigned NumFPArgRegs = std::size(FPArgRegs);
4202 
4203  if (useSoftFloat() || hasSPE())
4204  NumFPArgRegs = 0;
4205 
4206  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4207  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4208 
4209  // Make room for NumGPArgRegs and NumFPArgRegs.
4210  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4211  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4212 
4213  FuncInfo->setVarArgsStackOffset(
4214  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4215  CCInfo.getNextStackOffset(), true));
4216 
4217  FuncInfo->setVarArgsFrameIndex(
4218  MFI.CreateStackObject(Depth, Align(8), false));
4219  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4220 
4221  // The fixed integer arguments of a variadic function are stored to the
4222  // VarArgsFrameIndex on the stack so that they may be loaded by
4223  // dereferencing the result of va_next.
4224  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4225  // Get an existing live-in vreg, or add a new one.
4226  Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4227  if (!VReg)
4228  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4229 
4230  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4231  SDValue Store =
4232  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4233  MemOps.push_back(Store);
4234  // Increment the address by four for the next argument to store
4235  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4236  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4237  }
4238 
4239  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4240  // is set.
4241  // The double arguments are stored to the VarArgsFrameIndex
4242  // on the stack.
4243  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4244  // Get an existing live-in vreg, or add a new one.
4245  Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4246  if (!VReg)
4247  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4248 
4249  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4250  SDValue Store =
4251  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4252  MemOps.push_back(Store);
4253  // Increment the address by eight for the next argument to store
4254  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4255  PtrVT);
4256  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4257  }
4258  }
4259 
4260  if (!MemOps.empty())
4261  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4262 
4263  return Chain;
4264 }
4265 
4266 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4267 // value to MVT::i64 and then truncate to the correct register size.
4268 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4269  EVT ObjectVT, SelectionDAG &DAG,
4270  SDValue ArgVal,
4271  const SDLoc &dl) const {
4272  if (Flags.isSExt())
4273  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4274  DAG.getValueType(ObjectVT));
4275  else if (Flags.isZExt())
4276  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4277  DAG.getValueType(ObjectVT));
4278 
4279  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4280 }
4281 
4282 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4283  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4284  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4285  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4286  // TODO: add description of PPC stack frame format, or at least some docs.
4287  //
4288  bool isELFv2ABI = Subtarget.isELFv2ABI();
4289  bool isLittleEndian = Subtarget.isLittleEndian();
4290  MachineFunction &MF = DAG.getMachineFunction();
4291  MachineFrameInfo &MFI = MF.getFrameInfo();
4292  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4293 
4294  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4295  "fastcc not supported on varargs functions");
4296 
4297  EVT PtrVT = getPointerTy(MF.getDataLayout());
4298  // Potential tail calls could cause overwriting of argument stack slots.
4299  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4300  (CallConv == CallingConv::Fast));
4301  unsigned PtrByteSize = 8;
4302  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4303 
4304  static const MCPhysReg GPR[] = {
4305  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4306  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4307  };
4308  static const MCPhysReg VR[] = {
4309  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4310  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4311  };
4312 
4313  const unsigned Num_GPR_Regs = std::size(GPR);
4314  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4315  const unsigned Num_VR_Regs = std::size(VR);
4316 
4317  // Do a first pass over the arguments to determine whether the ABI
4318  // guarantees that our caller has allocated the parameter save area
4319  // on its stack frame. In the ELFv1 ABI, this is always the case;
4320  // in the ELFv2 ABI, it is true if this is a vararg function or if
4321  // any parameter is located in a stack slot.
4322 
4323  bool HasParameterArea = !isELFv2ABI || isVarArg;
4324  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4325  unsigned NumBytes = LinkageSize;
4326  unsigned AvailableFPRs = Num_FPR_Regs;
4327  unsigned AvailableVRs = Num_VR_Regs;
4328  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4329  if (Ins[i].Flags.isNest())
4330  continue;
4331 
4332  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4333  PtrByteSize, LinkageSize, ParamAreaSize,
4334  NumBytes, AvailableFPRs, AvailableVRs))
4335  HasParameterArea = true;
4336  }
4337 
4338  // Add DAG nodes to load the arguments or copy them out of registers. On
4339  // entry to a function on PPC, the arguments start after the linkage area,
4340  // although the first ones are often in registers.
4341 
4342  unsigned ArgOffset = LinkageSize;
4343  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4344  SmallVector<SDValue, 8> MemOps;
4346  unsigned CurArgIdx = 0;
4347  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4348  SDValue ArgVal;
4349  bool needsLoad = false;
4350  EVT ObjectVT = Ins[ArgNo].VT;
4351  EVT OrigVT = Ins[ArgNo].ArgVT;
4352  unsigned ObjSize = ObjectVT.getStoreSize();
4353  unsigned ArgSize = ObjSize;
4354  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4355  if (Ins[ArgNo].isOrigArg()) {
4356  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4357  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4358  }
4359  // We re-align the argument offset for each argument, except when using the
4360  // fast calling convention, when we need to make sure we do that only when
4361  // we'll actually use a stack slot.
4362  unsigned CurArgOffset;
4363  Align Alignment;
4364  auto ComputeArgOffset = [&]() {
4365  /* Respect alignment of argument on the stack. */
4366  Alignment =
4367  CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4368  ArgOffset = alignTo(ArgOffset, Alignment);
4369  CurArgOffset = ArgOffset;
4370  };
4371 
4372  if (CallConv != CallingConv::Fast) {
4373  ComputeArgOffset();
4374 
4375  /* Compute GPR index associated with argument offset. */
4376  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4377  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4378  }
4379 
4380  // FIXME the codegen can be much improved in some cases.
4381  // We do not have to keep everything in memory.
4382  if (Flags.isByVal()) {
4383  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4384 
4385  if (CallConv == CallingConv::Fast)
4386  ComputeArgOffset();
4387 
4388  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4389  ObjSize = Flags.getByValSize();
4390  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4391  // Empty aggregate parameters do not take up registers. Examples:
4392  // struct { } a;
4393  // union { } b;
4394  // int c[0];
4395  // etc. However, we have to provide a place-holder in InVals, so
4396  // pretend we have an 8-byte item at the current address for that
4397  // purpose.
4398  if (!ObjSize) {
4399  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4400  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4401  InVals.push_back(FIN);
4402  continue;
4403  }
4404 
4405  // Create a stack object covering all stack doublewords occupied
4406  // by the argument. If the argument is (fully or partially) on
4407  // the stack, or if the argument is fully in registers but the
4408  // caller has allocated the parameter save anyway, we can refer
4409  // directly to the caller's stack frame. Otherwise, create a
4410  // local copy in our own frame.
4411  int FI;
4412  if (HasParameterArea ||
4413  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4414  FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4415  else
4416  FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4417  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4418 
4419  // Handle aggregates smaller than 8 bytes.
4420  if (ObjSize < PtrByteSize) {
4421  // The value of the object is its address, which differs from the
4422  // address of the enclosing doubleword on big-endian systems.
4423  SDValue Arg = FIN;
4424  if (!isLittleEndian) {
4425  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4426  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4427  }
4428  InVals.push_back(Arg);
4429 
4430  if (GPR_idx != Num_GPR_Regs) {
4431  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4432  FuncInfo->addLiveInAttr(VReg, Flags);
4433  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4434  EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4435  SDValue Store =
4436  DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4437  MachinePointerInfo(&*FuncArg), ObjType);
4438  MemOps.push_back(Store);
4439  }
4440  // Whether we copied from a register or not, advance the offset
4441  // into the parameter save area by a full doubleword.
4442  ArgOffset += PtrByteSize;
4443  continue;
4444  }
4445 
4446  // The value of the object is its address, which is the address of
4447  // its first stack doubleword.
4448  InVals.push_back(FIN);
4449 
4450  // Store whatever pieces of the object are in registers to memory.
4451  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4452  if (GPR_idx == Num_GPR_Regs)
4453  break;
4454 
4455  Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4456  FuncInfo->addLiveInAttr(VReg, Flags);
4457  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4458  SDValue Addr = FIN;
4459  if (j) {
4460  SDValue Off = DAG.getConstant(j, dl, PtrVT);
4461  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4462  }
4463  unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4464  EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4465  SDValue Store =
4466  DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4467  MachinePointerInfo(&*FuncArg, j), ObjType);
4468  MemOps.push_back(Store);
4469  ++GPR_idx;
4470  }
4471  ArgOffset += ArgSize;
4472  continue;
4473  }
4474 
4475  switch (ObjectVT.getSimpleVT().SimpleTy) {
4476  default: llvm_unreachable("Unhandled argument type!");
4477  case MVT::i1:
4478  case MVT::i32:
4479  case MVT::i64:
4480  if (Flags.isNest()) {
4481  // The 'nest' parameter, if any, is passed in R11.
4482  Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4483  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4484 
4485  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4486  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4487 
4488  break;
4489  }
4490 
4491  // These can be scalar arguments or elements of an integer array type
4492  // passed directly. Clang may use those instead of "byval" aggregate
4493  // types to avoid forcing arguments to memory unnecessarily.
4494  if (GPR_idx != Num_GPR_Regs) {
4495  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4496  FuncInfo->addLiveInAttr(VReg, Flags);
4497  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4498 
4499  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4500  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4501  // value to MVT::i64 and then truncate to the correct register size.
4502  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4503  } else {
4504  if (CallConv == CallingConv::Fast)
4505  ComputeArgOffset();
4506 
4507  needsLoad = true;
4508  ArgSize = PtrByteSize;
4509  }
4510  if (CallConv != CallingConv::Fast || needsLoad)
4511  ArgOffset += 8;
4512  break;
4513 
4514  case MVT::f32:
4515  case MVT::f64:
4516  // These can be scalar arguments or elements of a float array type
4517  // passed directly. The latter are used to implement ELFv2 homogenous
4518  // float aggregates.
4519  if (FPR_idx != Num_FPR_Regs) {
4520  unsigned VReg;
4521 
4522  if (ObjectVT == MVT::f32)
4523  VReg = MF.addLiveIn(FPR[FPR_idx],
4524  Subtarget.hasP8Vector()
4525  ? &PPC::VSSRCRegClass
4526  : &PPC::F4RCRegClass);
4527  else
4528  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4529  ? &PPC::VSFRCRegClass
4530  : &PPC::F8RCRegClass);
4531 
4532  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4533  ++FPR_idx;
4534  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4535  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4536  // once we support fp <-> gpr moves.
4537 
4538  // This can only ever happen in the presence of f32 array types,
4539  // since otherwise we never run out of FPRs before running out
4540  // of GPRs.
4541  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4542  FuncInfo->addLiveInAttr(VReg, Flags);
4543  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4544 
4545  if (ObjectVT == MVT::f32) {
4546  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4547  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4548  DAG.getConstant(32, dl, MVT::i32));
4549  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4550  }
4551 
4552  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4553  } else {
4554  if (CallConv == CallingConv::Fast)
4555  ComputeArgOffset();
4556 
4557  needsLoad = true;
4558  }
4559 
4560  // When passing an array of floats, the array occupies consecutive
4561  // space in the argument area; only round up to the next doubleword
4562  // at the end of the array. Otherwise, each float takes 8 bytes.
4563  if (CallConv != CallingConv::Fast || needsLoad) {
4564  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4565  ArgOffset += ArgSize;
4566  if (Flags.isInConsecutiveRegsLast())
4567  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4568  }
4569  break;
4570  case MVT::v4f32:
4571  case MVT::v4i32:
4572  case MVT::v8i16:
4573  case MVT::v16i8:
4574  case MVT::v2f64:
4575  case MVT::v2i64:
4576  case MVT::v1i128:
4577  case MVT::f128:
4578  // These can be scalar arguments or elements of a vector array type
4579  // passed directly. The latter are used to implement ELFv2 homogenous
4580  // vector aggregates.
4581  if (VR_idx != Num_VR_Regs) {
4582  Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4583  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4584  ++VR_idx;
4585  } else {
4586  if (CallConv == CallingConv::Fast)
4587  ComputeArgOffset();
4588  needsLoad = true;
4589  }
4590  if (CallConv != CallingConv::Fast || needsLoad)
4591  ArgOffset += 16;
4592  break;
4593  }
4594 
4595  // We need to load the argument to a virtual register if we determined
4596  // above that we ran out of physical registers of the appropriate type.
4597  if (needsLoad) {
4598  if (ObjSize < ArgSize && !isLittleEndian)
4599  CurArgOffset += ArgSize - ObjSize;
4600  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4601  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4602  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4603  }
4604 
4605  InVals.push_back(ArgVal);
4606  }
4607 
4608  // Area that is at least reserved in the caller of this function.
4609  unsigned MinReservedArea;
4610  if (HasParameterArea)
4611  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4612  else
4613  MinReservedArea = LinkageSize;
4614 
4615  // Set the size that is at least reserved in caller of this function. Tail
4616  // call optimized functions' reserved stack space needs to be aligned so that
4617  // taking the difference between two stack areas will result in an aligned
4618  // stack.
4619  MinReservedArea =
4620  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4621  FuncInfo->setMinReservedArea(MinReservedArea);
4622 
4623  // If the function takes variable number of arguments, make a frame index for
4624  // the start of the first vararg value... for expansion of llvm.va_start.
4625  // On ELFv2ABI spec, it writes:
4626  // C programs that are intended to be *portable* across different compilers
4627  // and architectures must use the header file <stdarg.h> to deal with variable
4628  // argument lists.
4629  if (isVarArg && MFI.hasVAStart()) {
4630  int Depth = ArgOffset;
4631 
4632  FuncInfo->setVarArgsFrameIndex(
4633  MFI.CreateFixedObject(PtrByteSize, Depth, true));
4634  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4635 
4636  // If this function is vararg, store any remaining integer argument regs
4637  // to their spots on the stack so that they may be loaded by dereferencing
4638  // the result of va_next.
4639  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4640  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4641  Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4642  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4643  SDValue Store =
4644  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4645  MemOps.push_back(Store);
4646  // Increment the address by four for the next argument to store
4647  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4648  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4649  }
4650  }
4651 
4652  if (!MemOps.empty())
4653  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4654 
4655  return Chain;
4656 }
4657 
4658 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4659 /// adjusted to accommodate the arguments for the tailcall.
4660 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4661  unsigned ParamSize) {
4662 
4663  if (!isTailCall) return 0;
4664 
4666  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4667  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4668  // Remember only if the new adjustment is bigger.
4669  if (SPDiff < FI->getTailCallSPDelta())
4670  FI->setTailCallSPDelta(SPDiff);
4671 
4672  return SPDiff;
4673 }
4674 
4675 static bool isFunctionGlobalAddress(SDValue Callee);
4676 
4677 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4678  const TargetMachine &TM) {
4679  // It does not make sense to call callsShareTOCBase() with a caller that
4680  // is PC Relative since PC Relative callers do not have a TOC.
4681 #ifndef NDEBUG
4682  const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4683  assert(!STICaller->isUsingPCRelativeCalls() &&
4684  "PC Relative callers do not have a TOC and cannot share a TOC Base");
4685 #endif
4686 
4687  // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4688  // don't have enough information to determine if the caller and callee share
4689  // the same TOC base, so we have to pessimistically assume they don't for
4690  // correctness.
4691  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4692  if (!G)
4693  return false;
4694 
4695  const GlobalValue *GV = G->getGlobal();
4696 
4697  // If the callee is preemptable, then the static linker will use a plt-stub
4698  // which saves the toc to the stack, and needs a nop after the call
4699  // instruction to convert to a toc-restore.
4700  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4701  return false;
4702 
4703  // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4704  // We may need a TOC restore in the situation where the caller requires a
4705  // valid TOC but the callee is PC Relative and does not.
4706  const Function *F = dyn_cast<Function>(GV);
4707  const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4708 
4709  // If we have an Alias we can try to get the function from there.
4710  if (Alias) {
4711  const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4712  F = dyn_cast<Function>(GlobalObj);
4713  }
4714 
4715  // If we still have no valid function pointer we do not have enough
4716  // information to determine if the callee uses PC Relative calls so we must
4717  // assume that it does.
4718  if (!F)
4719  return false;
4720 
4721  // If the callee uses PC Relative we cannot guarantee that the callee won't
4722  // clobber the TOC of the caller and so we must assume that the two
4723  // functions do not share a TOC base.
4724  const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4725  if (STICallee->isUsingPCRelativeCalls())
4726  return false;
4727 
4728  // If the GV is not a strong definition then we need to assume it can be
4729  // replaced by another function at link time. The function that replaces
4730  // it may not share the same TOC as the caller since the callee may be
4731  // replaced by a PC Relative version of the same function.
4732  if (!GV->isStrongDefinitionForLinker())
4733  return false;
4734 
4735  // The medium and large code models are expected to provide a sufficiently
4736  // large TOC to provide all data addressing needs of a module with a
4737  // single TOC.
4738  if (CodeModel::Medium == TM.getCodeModel() ||
4739  CodeModel::Large == TM.getCodeModel())
4740  return true;
4741 
4742  // Any explicitly-specified sections and section prefixes must also match.
4743  // Also, if we're using -ffunction-sections, then each function is always in
4744  // a different section (the same is true for COMDAT functions).
4745  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4746  GV->getSection() != Caller->getSection())
4747  return false;
4748  if (const auto *F = dyn_cast<Function>(GV)) {
4749  if (F->getSectionPrefix() != Caller->getSectionPrefix())
4750  return false;
4751  }
4752 
4753  return true;
4754 }
4755 
4756 static bool
4758  const SmallVectorImpl<ISD::OutputArg> &Outs) {
4759  assert(Subtarget.is64BitELFABI());
4760 
4761  const unsigned PtrByteSize = 8;
4762  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4763 
4764  static const MCPhysReg GPR[] = {
4765  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4766  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4767  };
4768  static const MCPhysReg VR[] = {
4769  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4770  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4771  };
4772 
4773  const unsigned NumGPRs = std::size(GPR);
4774  const unsigned NumFPRs = 13;
4775  const unsigned NumVRs = std::size(VR);
4776  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4777 
4778  unsigned NumBytes = LinkageSize;
4779  unsigned AvailableFPRs = NumFPRs;
4780  unsigned AvailableVRs = NumVRs;
4781 
4782  for (const ISD::OutputArg& Param : Outs) {
4783  if (Param.Flags.isNest()) continue;
4784 
4785  if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4786  LinkageSize, ParamAreaSize, NumBytes,
4787  AvailableFPRs, AvailableVRs))
4788  return true;
4789  }
4790  return false;
4791 }
4792 
4793 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4794  if (CB.arg_size() != CallerFn->arg_size())
4795  return false;
4796 
4797  auto CalleeArgIter = CB.arg_begin();
4798  auto CalleeArgEnd = CB.arg_end();
4799  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4800 
4801  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4802  const Value* CalleeArg = *CalleeArgIter;
4803  const Value* CallerArg = &(*CallerArgIter);
4804  if (CalleeArg == CallerArg)
4805  continue;
4806 
4807  // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4808  // tail call @callee([4 x i64] undef, [4 x i64] %b)
4809  // }
4810  // 1st argument of callee is undef and has the same type as caller.
4811  if (CalleeArg->getType() == CallerArg->getType() &&
4812  isa<UndefValue>(CalleeArg))
4813  continue;
4814 
4815  return false;
4816  }
4817 
4818  return true;
4819 }
4820 
4821 // Returns true if TCO is possible between the callers and callees
4822 // calling conventions.
4823 static bool
4825  CallingConv::ID CalleeCC) {
4826  // Tail calls are possible with fastcc and ccc.
4827  auto isTailCallableCC = [] (CallingConv::ID CC){
4828  return CC == CallingConv::C || CC == CallingConv::Fast;
4829  };
4830  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4831  return false;
4832 
4833  // We can safely tail call both fastcc and ccc callees from a c calling
4834  // convention caller. If the caller is fastcc, we may have less stack space
4835  // than a non-fastcc caller with the same signature so disable tail-calls in
4836  // that case.
4837  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4838 }
4839 
4840 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4841  SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4842  const SmallVectorImpl<ISD::OutputArg> &Outs,
4843  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4844  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4845 
4846  if (DisableSCO && !TailCallOpt) return false;
4847 
4848  // Variadic argument functions are not supported.
4849  if (isVarArg) return false;
4850 
4851  auto &Caller = DAG.getMachineFunction().getFunction();
4852  // Check that the calling conventions are compatible for tco.
4853  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4854  return false;
4855 
4856  // Caller contains any byval parameter is not supported.
4857  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4858  return false;
4859 
4860  // Callee contains any byval parameter is not supported, too.
4861  // Note: This is a quick work around, because in some cases, e.g.
4862  // caller's stack size > callee's stack size, we are still able to apply
4863  // sibling call optimization. For example, gcc is able to do SCO for caller1
4864  // in the following example, but not for caller2.
4865  // struct test {
4866  // long int a;
4867  // char ary[56];
4868  // } gTest;
4869  // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4870  // b->a = v.a;
4871  // return 0;
4872  // }
4873  // void caller1(struct test a, struct test c, struct test *b) {
4874  // callee(gTest, b); }
4875  // void caller2(struct test *b) { callee(gTest, b); }
4876  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4877  return false;
4878 
4879  // If callee and caller use different calling conventions, we cannot pass
4880  // parameters on stack since offsets for the parameter area may be different.
4881  if (Caller.getCallingConv() != CalleeCC &&
4882  needStackSlotPassParameters(Subtarget, Outs))
4883  return false;
4884 
4885  // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4886  // the caller and callee share the same TOC for TCO/SCO. If the caller and
4887  // callee potentially have different TOC bases then we cannot tail call since
4888  // we need to restore the TOC pointer after the call.
4889  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4890  // We cannot guarantee this for indirect calls or calls to external functions.
4891  // When PC-Relative addressing is used, the concept of the TOC is no longer
4892  // applicable so this check is not required.
4893  // Check first for indirect calls.
4894  if (!Subtarget.isUsingPCRelativeCalls() &&
4895  !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4896  return false;
4897 
4898  // Check if we share the TOC base.
4899  if (!Subtarget.isUsingPCRelativeCalls() &&
4900  !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4901  return false;
4902 
4903  // TCO allows altering callee ABI, so we don't have to check further.
4904  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4905  return true;
4906 
4907  if (DisableSCO) return false;
4908 
4909  // If callee use the same argument list that caller is using, then we can
4910  // apply SCO on this case. If it is not, then we need to check if callee needs
4911  // stack for passing arguments.
4912  // PC Relative tail calls may not have a CallBase.
4913  // If there is no CallBase we cannot verify if we have the same argument
4914  // list so assume that we don't have the same argument list.
4915  if (CB && !hasSameArgumentList(&Caller, *CB) &&
4916  needStackSlotPassParameters(Subtarget, Outs))
4917  return false;
4918  else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4919  return false;
4920 
4921  return true;
4922 }
4923 
4924 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4925 /// for tail call optimization. Targets which want to do tail call
4926 /// optimization should implement this function.
4927 bool
4928 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4929  CallingConv::ID CalleeCC,
4930  bool isVarArg,
4932  SelectionDAG& DAG) const {
4933  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4934  return false;
4935 
4936  // Variable argument functions are not supported.
4937  if (isVarArg)
4938  return false;
4939 
4940  MachineFunction &MF = DAG.getMachineFunction();
4941  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4942  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4943  // Functions containing by val parameters are not supported.
4944  for (unsigned i = 0; i != Ins.size(); i++) {
4945  ISD::ArgFlagsTy Flags = Ins[i].Flags;
4946  if (Flags.isByVal()) return false;
4947  }
4948 
4949  // Non-PIC/GOT tail calls are supported.
4950  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4951  return true;
4952 
4953  // At the moment we can only do local tail calls (in same module, hidden
4954  // or protected) if we are generating PIC.
4955  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4956  return G->getGlobal()->hasHiddenVisibility()
4957  || G->getGlobal()->hasProtectedVisibility();
4958  }
4959 
4960  return false;
4961 }
4962 
4963 /// isCallCompatibleAddress - Return the immediate to use if the specified
4964 /// 32-bit value is representable in the immediate field of a BxA instruction.
4966  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4967  if (!C) return nullptr;
4968 
4969  int Addr = C->getZExtValue();
4970  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4971  SignExtend32<26>(Addr) != Addr)
4972  return nullptr; // Top 6 bits have to be sext of immediate.
4973 
4974  return DAG
4975  .getConstant(
4976  (int)C->getZExtValue() >> 2, SDLoc(Op),
4978  .getNode();
4979 }
4980 
4981 namespace {
4982 
4983 struct TailCallArgumentInfo {
4984  SDValue Arg;
4985  SDValue FrameIdxOp;
4986  int FrameIdx = 0;
4987 
4988  TailCallArgumentInfo() = default;
4989 };
4990 
4991 } // end anonymous namespace
4992 
4993 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4995  SelectionDAG &DAG, SDValue Chain,
4996  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4997  SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4998  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4999  SDValue Arg = TailCallArgs[i].Arg;
5000  SDValue FIN = TailCallArgs[i].FrameIdxOp;
5001  int FI = TailCallArgs[i].FrameIdx;
5002  // Store relative to framepointer.
5003  MemOpChains.push_back(DAG.getStore(
5004  Chain, dl, Arg, FIN,
5006  }
5007 }
5008 
5009 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5010 /// the appropriate stack slot for the tail call optimized function call.
5012  SDValue OldRetAddr, SDValue OldFP,
5013  int SPDiff, const SDLoc &dl) {
5014  if (SPDiff) {
5015  // Calculate the new stack slot for the return address.
5016  MachineFunction &MF = DAG.getMachineFunction();
5017  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5018  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5019  bool isPPC64 = Subtarget.isPPC64();
5020  int SlotSize = isPPC64 ? 8 : 4;
5021  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5022  int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5023  NewRetAddrLoc, true);
5024  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5025  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5026  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5027  MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5028  }
5029  return Chain;
5030 }
5031 
5032 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5033 /// the position of the argument.
5034 static void
5036  SDValue Arg, int SPDiff, unsigned ArgOffset,
5037  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5038  int Offset = ArgOffset + SPDiff;
5039  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5040  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5041  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5042  SDValue FIN = DAG.getFrameIndex(FI, VT);
5043  TailCallArgumentInfo Info;
5044  Info.Arg = Arg;
5045  Info.FrameIdxOp = FIN;
5046  Info.FrameIdx = FI;
5047  TailCallArguments.push_back(Info);
5048 }
5049 
5050 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5051 /// stack slot. Returns the chain as result and the loaded frame pointers in
5052 /// LROpOut/FPOpout. Used when tail calling.
5053 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5054  SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5055  SDValue &FPOpOut, const SDLoc &dl) const {
5056  if (SPDiff) {
5057  // Load the LR and FP stack slot for later adjusting.
5058  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5059  LROpOut = getReturnAddrFrameIndex(DAG);
5060  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5061  Chain = SDValue(LROpOut.getNode(), 1);
5062  }
5063  return Chain;
5064 }
5065 
5066 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5067 /// by "Src" to address "Dst" of size "Size". Alignment information is
5068 /// specified by the specific parameter attribute. The copy will be passed as
5069 /// a byval function parameter.
5070 /// Sometimes what we are copying is the end of a larger object, the part that
5071 /// does not fit in registers.
5073  SDValue Chain, ISD::ArgFlagsTy Flags,
5074  SelectionDAG &DAG, const SDLoc &dl) {
5075  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5076  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5077  Flags.getNonZeroByValAlign(), false, false, false,
5079 }
5080 
5081 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5082 /// tail calls.
5083 static void LowerMemOpCallTo(
5084  SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5085  SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5086  bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5087  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5088  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5089  if (!isTailCall) {
5090  if (isVector) {
5091  SDValue StackPtr;
5092  if (isPPC64)
5093  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5094  else
5095  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5096  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5097  DAG.getConstant(ArgOffset, dl, PtrVT));
5098  }
5099  MemOpChains.push_back(
5100  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5101  // Calculate and remember argument location.
5102  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5103  TailCallArguments);
5104 }
5105 
5106 static void
5108  const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5109  SDValue FPOp,
5110  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5111  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5112  // might overwrite each other in case of tail call optimization.
5113  SmallVector<SDValue, 8> MemOpChains2;
5114  // Do not flag preceding copytoreg stuff together with the following stuff.
5115  InFlag = SDValue();
5116  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5117  MemOpChains2, dl);
5118  if (!MemOpChains2.empty())
5119  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5120 
5121  // Store the return address to the appropriate stack slot.
5122  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5123 
5124  // Emit callseq_end just before tailcall node.
5125  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, dl);
5126  InFlag = Chain.getValue(1);
5127 }
5128 
5129 // Is this global address that of a function that can be called by name? (as
5130 // opposed to something that must hold a descriptor for an indirect call).
5131 static bool isFunctionGlobalAddress(SDValue Callee) {
5132  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5133  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5134  Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5135  return false;
5136 
5137  return G->getGlobal()->getValueType()->isFunctionTy();
5138  }
5139 
5140  return false;
5141 }
5142 
5143 SDValue PPCTargetLowering::LowerCallResult(
5144  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5145  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5146  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5148  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5149  *DAG.getContext());
5150 
5151  CCRetInfo.AnalyzeCallResult(
5152  Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5153  ? RetCC_PPC_Cold
5154  : RetCC_PPC);
5155 
5156  // Copy all of the result registers out of their specified physreg.
5157  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5158  CCValAssign &VA = RVLocs[i];
5159  assert(VA.isRegLoc() && "Can only return in registers!");
5160 
5161  SDValue Val;
5162 
5163  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5164  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5165  InFlag);
5166  Chain = Lo.getValue(1);
5167  InFlag = Lo.getValue(2);
5168  VA = RVLocs[++i]; // skip ahead to next loc
5169  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5170  InFlag);
5171  Chain = Hi.getValue(1);
5172  InFlag = Hi.getValue(2);
5173  if (!Subtarget.isLittleEndian())
5174  std::swap (Lo, Hi);
5175  Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5176  } else {
5177  Val = DAG.getCopyFromReg(Chain, dl,
5178  VA.getLocReg(), VA.getLocVT(), InFlag);
5179  Chain = Val.getValue(1);
5180  InFlag = Val.getValue(2);
5181  }
5182 
5183  switch (VA.getLocInfo()) {
5184  default: llvm_unreachable("Unknown loc info!");
5185  case CCValAssign::Full: break;
5186  case CCValAssign::AExt:
5187  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5188  break;
5189  case CCValAssign::ZExt:
5190  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5191  DAG.getValueType(VA.getValVT()));
5192  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5193  break;
5194  case CCValAssign::SExt:
5195  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5196  DAG.getValueType(VA.getValVT()));
5197  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5198  break;
5199  }
5200 
5201  InVals.push_back(Val);
5202  }
5203 
5204  return Chain;
5205 }
5206 
5207 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5208  const PPCSubtarget &Subtarget, bool isPatchPoint) {
5209  // PatchPoint calls are not indirect.
5210  if (isPatchPoint)
5211  return false;
5212 
5213  if (isFunctionGlobalAddress(Callee) || isa<ExternalSymbolSDNode>(Callee))
5214  return false;
5215 
5216  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5217  // becuase the immediate function pointer points to a descriptor instead of
5218  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5219  // pointer immediate points to the global entry point, while the BLA would
5220  // need to jump to the local entry point (see rL211174).
5221  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5223  return false;
5224 
5225  return true;
5226 }
5227 
5228 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5229 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5230  return Subtarget.isAIXABI() ||
5231  (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5232 }
5233 
5235  const Function &Caller, const SDValue &Callee,
5236  const PPCSubtarget &Subtarget,
5237  const TargetMachine &TM,
5238  bool IsStrictFPCall = false) {
5239  if (CFlags.IsTailCall)
5240  return PPCISD::TC_RETURN;
5241 
5242  unsigned RetOpc = 0;
5243  // This is a call through a function pointer.
5244  if (CFlags.IsIndirect) {
5245  // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5246  // indirect calls. The save of the caller's TOC pointer to the stack will be
5247  // inserted into the DAG as part of call lowering. The restore of the TOC
5248  // pointer is modeled by using a pseudo instruction for the call opcode that
5249  // represents the 2 instruction sequence of an indirect branch and link,
5250  // immediately followed by a load of the TOC pointer from the the stack save
5251  // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5252  // as it is not saved or used.
5253  RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5254  : PPCISD::BCTRL;
5255  } else if (Subtarget.isUsingPCRelativeCalls()) {
5256  assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5257  RetOpc = PPCISD::CALL_NOTOC;
5258  } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5259  // The ABIs that maintain a TOC pointer accross calls need to have a nop
5260  // immediately following the call instruction if the caller and callee may
5261  // have different TOC bases. At link time if the linker determines the calls
5262  // may not share a TOC base, the call is redirected to a trampoline inserted
5263  // by the linker. The trampoline will (among other things) save the callers
5264  // TOC pointer at an ABI designated offset in the linkage area and the
5265  // linker will rewrite the nop to be a load of the TOC pointer from the
5266  // linkage area into gpr2.
5267  RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5268  : PPCISD::CALL_NOP;
5269  else
5270  RetOpc = PPCISD::CALL;
5271  if (IsStrictFPCall) {
5272  switch (RetOpc) {
5273  default:
5274  llvm_unreachable("Unknown call opcode");
5276  RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5277  break;
5278  case PPCISD::BCTRL:
5279  RetOpc = PPCISD::BCTRL_RM;
5280  break;
5281  case PPCISD::CALL_NOTOC:
5282  RetOpc = PPCISD::CALL_NOTOC_RM;
5283  break;
5284  case PPCISD::CALL:
5285  RetOpc = PPCISD::CALL_RM;
5286  break;
5287  case PPCISD::CALL_NOP:
5288  RetOpc = PPCISD::CALL_NOP_RM;
5289  break;
5290  }
5291  }
5292  return RetOpc;
5293 }
5294 
5295 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5296  const SDLoc &dl, const PPCSubtarget &Subtarget) {
5297  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5298  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5299  return SDValue(Dest, 0);
5300 
5301  // Returns true if the callee is local, and false otherwise.
5302  auto isLocalCallee = [&]() {
5303  const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5304  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5305  const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5306 
5307  return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5308  !isa_and_nonnull<GlobalIFunc>(GV);
5309  };
5310 
5311  // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5312  // a static relocation model causes some versions of GNU LD (2.17.50, at
5313  // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5314  // built with secure-PLT.
5315  bool UsePlt =
5316  Subtarget.is32BitELFABI() && !isLocalCallee() &&
5318 
5319  const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5320  const TargetMachine &TM = Subtarget.getTargetMachine();
5321  const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5322  MCSymbolXCOFF *S =
5323  cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5324 
5325  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5326  return DAG.getMCSymbol(S, PtrVT);
5327  };
5328 
5330  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5331 
5332  if (Subtarget.isAIXABI()) {
5333  assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5334  return getAIXFuncEntryPointSymbolSDNode(GV);
5335  }
5336  return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5337  UsePlt ? PPCII::MO_PLT : 0);
5338  }
5339 
5340  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5341  const char *SymName = S->getSymbol();
5342  if (Subtarget.isAIXABI()) {
5343  // If there exists a user-declared function whose name is the same as the
5344  // ExternalSymbol's, then we pick up the user-declared version.
5345  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5346  if (const Function *F =
5347  dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5348  return getAIXFuncEntryPointSymbolSDNode(F);
5349 
5350  // On AIX, direct function calls reference the symbol for the function's
5351  // entry point, which is named by prepending a "." before the function's
5352  // C-linkage name. A Qualname is returned here because an external
5353  // function entry point is a csect with XTY_ER property.
5354  const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5355  auto &Context = DAG.getMachineFunction().getMMI().getContext();
5356  MCSectionXCOFF *Sec = Context.getXCOFFSection(
5357  (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5359  return Sec->getQualNameSymbol();
5360  };
5361 
5362  SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5363  }
5364  return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5365  UsePlt ? PPCII::MO_PLT : 0);
5366  }
5367 
5368  // No transformation needed.
5369  assert(Callee.getNode() && "What no callee?");
5370  return Callee;
5371 }
5372 
5374  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5375  "Expected a CALLSEQ_STARTSDNode.");
5376 
5377  // The last operand is the chain, except when the node has glue. If the node
5378  // has glue, then the last operand is the glue, and the chain is the second
5379  // last operand.
5380  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5381  if (LastValue.getValueType() != MVT::Glue)
5382  return LastValue;
5383 
5384  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5385 }
5386 
5387 // Creates the node that moves a functions address into the count register
5388 // to prepare for an indirect call instruction.
5389 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5390  SDValue &Glue, SDValue &Chain,
5391  const SDLoc &dl) {
5392  SDValue MTCTROps[] = {Chain, Callee, Glue};
5393  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5394  Chain = DAG.getNode(PPCISD::MTCTR, dl, ArrayRef(ReturnTypes, 2),
5395  ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5396  // The glue is the second value produced.
5397  Glue = Chain.getValue(1);
5398 }
5399 
5401  SDValue &Glue, SDValue &Chain,
5402  SDValue CallSeqStart,
5403  const CallBase *CB, const SDLoc &dl,
5404  bool hasNest,
5405  const PPCSubtarget &Subtarget) {
5406  // Function pointers in the 64-bit SVR4 ABI do not point to the function
5407  // entry point, but to the function descriptor (the function entry point
5408  // address is part of the function descriptor though).
5409  // The function descriptor is a three doubleword structure with the
5410  // following fields: function entry point, TOC base address and
5411  // environment pointer.
5412  // Thus for a call through a function pointer, the following actions need
5413  // to be performed:
5414  // 1. Save the TOC of the caller in the TOC save area of its stack
5415  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5416  // 2. Load the address of the function entry point from the function
5417  // descriptor.
5418  // 3. Load the TOC of the callee from the function descriptor into r2.
5419  // 4. Load the environment pointer from the function descriptor into
5420  // r11.
5421  // 5. Branch to the function entry point address.
5422  // 6. On return of the callee, the TOC of the caller needs to be
5423  // restored (this is done in FinishCall()).
5424  //
5425  // The loads are scheduled at the beginning of the call sequence, and the
5426  // register copies are flagged together to ensure that no other
5427  // operations can be scheduled in between. E.g. without flagging the
5428  // copies together, a TOC access in the caller could be scheduled between
5429  // the assignment of the callee TOC and the branch to the callee, which leads
5430  // to incorrect code.
5431 
5432  // Start by loading the function address from the descriptor.
5433  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5434  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5438 
5439  MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5440 
5441  // Registers used in building the DAG.
5442  const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5443  const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5444 
5445  // Offsets of descriptor members.
5446  const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5447  const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5448 
5449  const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5450  const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5451 
5452  // One load for the functions entry point address.
5453  SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5454  Alignment, MMOFlags);
5455 
5456  // One for loading the TOC anchor for the module that contains the called
5457  // function.
5458  SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5459  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5460  SDValue TOCPtr =
5461  DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5462  MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5463 
5464  // One for loading the environment pointer.
5465  SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5466  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5467  SDValue LoadEnvPtr =
5468  DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5469  MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5470 
5471 
5472  // Then copy the newly loaded TOC anchor to the TOC pointer.
5473  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5474  Chain = TOCVal.getValue(0);
5475  Glue = TOCVal.getValue(1);
5476 
5477  // If the function call has an explicit 'nest' parameter, it takes the
5478  // place of the environment pointer.
5479  assert((!hasNest || !Subtarget.isAIXABI()) &&
5480  "Nest parameter is not supported on AIX.");
5481  if (!hasNest) {
5482  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5483  Chain = EnvVal.getValue(0);
5484  Glue = EnvVal.getValue(1);
5485  }
5486 
5487  // The rest of the indirect call sequence is the same as the non-descriptor
5488  // DAG.
5489  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5490 }
5491 
5492 static void
5494  PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5495  SelectionDAG &DAG,
5496  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5497  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5498  const PPCSubtarget &Subtarget) {
5499  const bool IsPPC64 = Subtarget.isPPC64();
5500  // MVT for a general purpose register.
5501  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5502 
5503  // First operand is always the chain.
5504  Ops.push_back(Chain);
5505 
5506  // If it's a direct call pass the callee as the second operand.
5507  if (!CFlags.IsIndirect)
5508  Ops.push_back(Callee);
5509  else {
5510  assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5511 
5512  // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5513  // on the stack (this would have been done in `LowerCall_64SVR4` or
5514  // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5515  // represents both the indirect branch and a load that restores the TOC
5516  // pointer from the linkage area. The operand for the TOC restore is an add
5517  // of the TOC save offset to the stack pointer. This must be the second
5518  // operand: after the chain input but before any other variadic arguments.
5519  // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5520  // saved or used.
5521  if (isTOCSaveRestoreRequired(Subtarget)) {
5522  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5523 
5524  SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5525  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5526  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5527  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5528  Ops.push_back(AddTOC);
5529  }
5530 
5531  // Add the register used for the environment pointer.
5532  if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5533  Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5534  RegVT));
5535 
5536 
5537  // Add CTR register as callee so a bctr can be emitted later.
5538  if (CFlags.IsTailCall)
5539  Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5540  }
5541 
5542  // If this is a tail call add stack pointer delta.
5543  if (CFlags.IsTailCall)
5544  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5545 
5546  // Add argument registers to the end of the list so that they are known live
5547  // into the call.
5548  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5549  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5550  RegsToPass[i].second.getValueType()));
5551 
5552  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5553  // no way to mark dependencies as implicit here.
5554  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5555  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5556  !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5557  Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5558 
5559  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5560  if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5561  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5562 
5563  // Add a register mask operand representing the call-preserved registers.
5564  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5565  const uint32_t *Mask =
5567  assert(Mask && "Missing call preserved mask for calling convention");
5568  Ops.push_back(DAG.getRegisterMask(Mask));
5569 
5570  // If the glue is valid, it is the last operand.
5571  if (Glue.getNode())
5572  Ops.push_back(Glue);
5573 }
5574 
5575 SDValue PPCTargetLowering::FinishCall(
5576  CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5577  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5578  SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5579  unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5580  SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5581 
5582  if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5583  Subtarget.isAIXABI())
5584  setUsesTOCBasePtr(DAG);
5585 
5586  unsigned CallOpc =
5587  getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5588  Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5589 
5590  if (!CFlags.IsIndirect)
5591  Callee = transformCallee(Callee, DAG, dl, Subtarget);
5592  else if (Subtarget.usesFunctionDescriptors())
5593  prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5594  dl, CFlags.HasNest, Subtarget);
5595  else
5596  prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5597 
5598  // Build the operand list for the call instruction.
5600  buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5601  SPDiff, Subtarget);
5602 
5603  // Emit tail call.
5604  if (CFlags.IsTailCall) {
5605  // Indirect tail call when using PC Relative calls do not have the same
5606  // constraints.
5607  assert(((Callee.getOpcode() == ISD::Register &&
5608  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5609  Callee.getOpcode() == ISD::TargetExternalSymbol ||
5610  Callee.getOpcode() == ISD::TargetGlobalAddress ||
5611  isa<ConstantSDNode>(Callee) ||
5612  (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5613  "Expecting a global address, external symbol, absolute value, "
5614  "register or an indirect tail call when PC Relative calls are "
5615  "used.");
5616  // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5617  assert(CallOpc == PPCISD::TC_RETURN &&
5618  "Unexpected call opcode for a tail call.");
5620  return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5621  }
5622 
5623  std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5624  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5625  DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5626  Glue = Chain.getValue(1);
5627 
5628  // When performing tail call optimization the callee pops its arguments off
5629  // the stack. Account for this here so these bytes can be pushed back on in
5630  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5631  int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5633  ? NumBytes
5634  : 0;
5635 
5636  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5637  Glue = Chain.getValue(1);
5638 
5639  return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5640  DAG, InVals);
5641 }
5642 
5643 SDValue
5644 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5645  SmallVectorImpl<SDValue> &InVals) const {
5646  SelectionDAG &DAG = CLI.DAG;
5647  SDLoc &dl = CLI.DL;
5649  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5651  SDValue Chain = CLI.Chain;
5652  SDValue Callee = CLI.Callee;
5653  bool &isTailCall = CLI.IsTailCall;
5654  CallingConv::ID CallConv = CLI.CallConv;
5655  bool isVarArg = CLI.IsVarArg;
5656  bool isPatchPoint = CLI.IsPatchPoint;
5657  const CallBase *CB = CLI.CB;
5658 
5659  if (isTailCall) {
5660  if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5661  isTailCall = false;
5662  else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5663  isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5664  Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5665  else
5666  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5667  Ins, DAG);
5668  if (isTailCall) {
5669  ++NumTailCalls;
5670  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5671  ++NumSiblingCalls;
5672 
5673  // PC Relative calls no longer guarantee that the callee is a Global
5674  // Address Node. The callee could be an indirect tail call in which
5675  // case the SDValue for the callee could be a load (to load the address
5676  // of a function pointer) or it may be a register copy (to move the
5677  // address of the callee from a function parameter into a virtual
5678  // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5679  assert((Subtarget.isUsingPCRelativeCalls() ||
5680  isa<GlobalAddressSDNode>(Callee)) &&
5681  "Callee should be an llvm::Function object.");
5682 
5683  LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5684  << "\nTCO callee: ");
5685  LLVM_DEBUG(Callee.dump());
5686  }
5687  }
5688 
5689  if (!isTailCall && CB && CB->isMustTailCall())
5690  report_fatal_error("failed to perform tail call elimination on a call "
5691  "site marked musttail");
5692 
5693  // When long calls (i.e. indirect calls) are always used, calls are always
5694  // made via function pointer. If we have a function name, first translate it
5695  // into a pointer.
5696  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5697  !isTailCall)
5698  Callee = LowerGlobalAddress(Callee, DAG);
5699 
5700  CallFlags CFlags(
5701  CallConv, isTailCall, isVarArg, isPatchPoint,
5702  isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5703  // hasNest
5704  Subtarget.is64BitELFABI() &&
5705  any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5706  CLI.NoMerge);
5707 
5708  if (Subtarget.isAIXABI())
5709  return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5710  InVals, CB);
5711 
5712  assert(Subtarget.isSVR4ABI());
5713  if (Subtarget.isPPC64())
5714  return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5715  InVals, CB);
5716  return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5717  InVals, CB);
5718 }
5719 
5720 SDValue PPCTargetLowering::LowerCall_32SVR4(
5721  SDValue Chain, SDValue Callee, CallFlags CFlags,
5722  const SmallVectorImpl<ISD::OutputArg> &Outs,
5723  const SmallVectorImpl<SDValue> &OutVals,
5724  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5725  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5726  const CallBase *CB) const {
5727  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5728  // of the 32-bit SVR4 ABI stack frame layout.
5729 
5730  const CallingConv::ID CallConv = CFlags.CallConv;
5731  const bool IsVarArg = CFlags.IsVarArg;
5732  const bool IsTailCall = CFlags.IsTailCall;
5733 
5734  assert((CallConv == CallingConv::C ||
5735  CallConv == CallingConv::Cold ||
5736  CallConv == CallingConv::Fast) && "Unknown calling convention!");
5737 
5738  const Align PtrAlign(4);
5739 
5740  MachineFunction &MF = DAG.getMachineFunction();
5741 
5742  // Mark this function as potentially containing a function that contains a
5743  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5744  // and restoring the callers stack pointer in this functions epilog. This is
5745  // done because by tail calling the called function might overwrite the value
5746  // in this function's (MF) stack pointer stack slot 0(SP).
5747  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5748  CallConv == CallingConv::Fast)
5749  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5750 
5751  // Count how many bytes are to be pushed on the stack, including the linkage
5752  // area, parameter list area and the part of the local variable space which
5753  // contains copies of aggregates which are passed by value.
5754 
5755  // Assign locations to all of the outgoing arguments.
5757  PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5758 
5759  // Reserve space for the linkage area on the stack.
5760  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5761  PtrAlign);
5762  if (useSoftFloat())
5763  CCInfo.PreAnalyzeCallOperands(Outs);
5764 
5765  if (IsVarArg) {
5766  // Handle fixed and variable vector arguments differently.
5767  // Fixed vector arguments go into registers as long as registers are
5768  // available. Variable vector arguments always go into memory.
5769  unsigned NumArgs = Outs.size();
5770 
5771  for (unsigned i = 0; i != NumArgs; ++i) {
5772  MVT ArgVT = Outs[i].VT;
5773  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5774  bool Result;
5775 
5776  if (Outs[i].IsFixed) {
5777  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5778  CCInfo);
5779  } else {
5781  ArgFlags, CCInfo);
5782  }
5783 
5784  if (Result) {
5785 #ifndef NDEBUG
5786  errs() << "Call operand #" << i << " has unhandled type "
5787  << ArgVT << "\n";
5788 #endif
5789  llvm_unreachable(nullptr);
5790  }
5791  }
5792  } else {
5793  // All arguments are treated the same.
5794  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5795  }
5796  CCInfo.clearWasPPCF128();
5797 
5798  // Assign locations to all of the outgoing aggregate by value arguments.
5799  SmallVector<CCValAssign, 16> ByValArgLocs;
5800  CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5801 
5802  // Reserve stack space for the allocations in CCInfo.
5803  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5804 
5805  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5806 
5807  // Size of the linkage area, parameter list area and the part of the local
5808  // space variable where copies of aggregates which are passed by value are
5809  // stored.
5810  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5811 
5812  // Calculate by how many bytes the stack has to be adjusted in case of tail
5813  // call optimization.
5814  int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5815 
5816  // Adjust the stack pointer for the new arguments...
5817  // These operations are automatically eliminated by the prolog/epilog pass
5818  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5819  SDValue CallSeqStart = Chain;
5820 
5821  // Load the return address and frame pointer so it can be moved somewhere else
5822  // later.
5823  SDValue LROp, FPOp;
5824  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5825 
5826  // Set up a copy of the stack pointer for use loading and storing any
5827  // arguments that may not fit in the registers available for argument
5828  // passing.
5829  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5830 
5832  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5833  SmallVector<SDValue, 8> MemOpChains;
5834 
5835  bool seenFloatArg = false;
5836  // Walk the register/memloc assignments, inserting copies/loads.
5837  // i - Tracks the index into the list of registers allocated for the call
5838  // RealArgIdx - Tracks the index into the list of actual function arguments
5839  // j - Tracks the index into the list of byval arguments
5840  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5841  i != e;
5842  ++i, ++RealArgIdx) {
5843  CCValAssign &VA = ArgLocs[i];
5844  SDValue Arg = OutVals[RealArgIdx];
5845  ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5846 
5847  if (Flags.isByVal()) {
5848  // Argument is an aggregate which is passed by value, thus we need to
5849  // create a copy of it in the local variable space of the current stack
5850  // frame (which is the stack frame of the caller) and pass the address of
5851  // this copy to the callee.
5852  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5853  CCValAssign &ByValVA = ByValArgLocs[j++];
5854  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5855 
5856  // Memory reserved in the local variable space of the callers stack frame.
5857  unsigned LocMemOffset = ByValVA.getLocMemOffset();
5858 
5859  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5860  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5861  StackPtr, PtrOff);
5862 
5863  // Create a copy of the argument in the local area of the current
5864  // stack frame.
5865  SDValue MemcpyCall =
5867  CallSeqStart.getNode()->getOperand(0),
5868  Flags, DAG, dl);
5869 
5870  // This must go outside the CALLSEQ_START..END.
5871  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5872  SDLoc(MemcpyCall));
5873  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5874  NewCallSeqStart.getNode());
5875  Chain = CallSeqStart = NewCallSeqStart;
5876 
5877  // Pass the address of the aggregate copy on the stack either in a
5878  // physical register or in the parameter list area of the current stack
5879  // frame to the callee.
5880  Arg = PtrOff;
5881  }
5882 
5883  // When useCRBits() is true, there can be i1 arguments.
5884  // It is because getRegisterType(MVT::i1) => MVT::i1,
5885  // and for other integer types getRegisterType() => MVT::i32.
5886  // Extend i1 and ensure callee will get i32.
5887  if (Arg.getValueType() == MVT::i1)
5888  Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5889  dl, MVT::i32, Arg);
5890 
5891  if (VA.isRegLoc()) {
5892  seenFloatArg |= VA.getLocVT().isFloatingPoint();
5893  // Put argument in a physical register.
5894  if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5895  bool IsLE = Subtarget.isLittleEndian();
5896  SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5897  DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5898  RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5899  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5900  DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5901  RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5902  SVal.getValue(0)));
5903  } else
5904  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5905  } else {
5906  // Put argument in the parameter list area of the current stack frame.
5907  assert(VA.isMemLoc());
5908  unsigned LocMemOffset = VA.getLocMemOffset();
5909 
5910  if (!IsTailCall) {
5911  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5912  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5913  StackPtr, PtrOff);
5914 
5915  MemOpChains.push_back(
5916  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5917  } else {
5918  // Calculate and remember argument location.
5919  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5920  TailCallArguments);
5921  }
5922  }
5923  }
5924 
5925  if (!MemOpChains.empty())
5926  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5927 
5928  // Build a sequence of copy-to-reg nodes chained together with token chain
5929  // and flag operands which copy the outgoing args into the appropriate regs.
5930  SDValue InFlag;
5931  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5932  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5933  RegsToPass[i].second, InFlag);
5934  InFlag = Chain.getValue(1);
5935  }
5936 
5937  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5938  // registers.
5939  if (IsVarArg) {
5940  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5941  SDValue Ops[] = { Chain, InFlag };
5942 
5943  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
5944  VTs, ArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5945 
5946  InFlag = Chain.getValue(1);
5947  }
5948 
5949  if (IsTailCall)
5950  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5951  TailCallArguments);
5952 
5953  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5954  Callee, SPDiff, NumBytes, Ins, InVals, CB);
5955 }
5956 
5957 // Copy an argument into memory, being careful to do this outside the
5958 // call sequence for the call to which the argument belongs.
5959 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5960  SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5961  SelectionDAG &DAG, const SDLoc &dl) const {
5962  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5963  CallSeqStart.getNode()->getOperand(0),
5964  Flags, DAG, dl);
5965  // The MEMCPY must go outside the CALLSEQ_START..END.
5966  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5967  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5968  SDLoc(MemcpyCall));
5969  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5970  NewCallSeqStart.getNode());
5971  return NewCallSeqStart;
5972 }
5973 
5974 SDValue PPCTargetLowering::LowerCall_64SVR4(
5975  SDValue Chain, SDValue Callee, CallFlags CFlags,
5976  const SmallVectorImpl<ISD::OutputArg> &Outs,
5977  const SmallVectorImpl<SDValue> &OutVals,
5978  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5979  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5980  const CallBase *CB) const {
5981  bool isELFv2ABI = Subtarget.isELFv2ABI();
5982  bool isLittleEndian = Subtarget.isLittleEndian();
5983  unsigned NumOps = Outs.size();
5984  bool IsSibCall = false;
5985  bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5986 
5987  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5988  unsigned PtrByteSize = 8;
5989 
5990  MachineFunction &MF = DAG.getMachineFunction();
5991 
5992  if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5993  IsSibCall = true;
5994 
5995  // Mark this function as potentially containing a function that contains a
5996  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5997  // and restoring the callers stack pointer in this functions epilog. This is
5998  // done because by tail calling the called function might overwrite the value
5999  // in this function's (MF) stack pointer stack slot 0(SP).
6000  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6001  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6002 
6003  assert(!(IsFastCall && CFlags.IsVarArg) &&
6004  "fastcc not supported on varargs functions");
6005 
6006  // Count how many bytes are to be pushed on the stack, including the linkage
6007  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6008  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6009  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6010  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6011  unsigned NumBytes = LinkageSize;
6012  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6013 
6014  static const MCPhysReg GPR[] = {
6015  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6016  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6017  };
6018  static const MCPhysReg VR[] = {
6019  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6020  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6021  };
6022 
6023  const unsigned NumGPRs = std::size(GPR);
6024  const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6025  const unsigned NumVRs = std::size(VR);
6026 
6027  // On ELFv2, we can avoid allocating the parameter area if all the arguments
6028  // can be passed to the callee in registers.
6029  // For the fast calling convention, there is another check below.
6030  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6031  bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6032  if (!HasParameterArea) {
6033  unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6034  unsigned AvailableFPRs = NumFPRs;
6035  unsigned AvailableVRs = NumVRs;
6036  unsigned NumBytesTmp = NumBytes;
6037  for (unsigned i = 0; i != NumOps; ++i) {
6038  if (Outs[i].Flags.isNest()) continue;
6039  if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6040  PtrByteSize, LinkageSize, ParamAreaSize,
6041  NumBytesTmp, AvailableFPRs, AvailableVRs))
6042  HasParameterArea = true;
6043  }
6044  }
6045 
6046  // When using the fast calling convention, we don't provide backing for
6047  // arguments that will be in registers.
6048  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6049 
6050  // Avoid allocating parameter area for fastcc functions if all the arguments
6051  // can be passed in the registers.
6052  if (IsFastCall)
6053  HasParameterArea = false;
6054 
6055  // Add up all the space actually used.
6056  for (unsigned i = 0; i != NumOps; ++i) {
6057  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6058  EVT ArgVT = Outs[i].VT;
6059  EVT OrigVT = Outs[i].ArgVT;
6060 
6061  if (Flags.isNest())
6062  continue;
6063 
6064  if (IsFastCall) {
6065  if (Flags.isByVal()) {
6066  NumGPRsUsed += (Flags.getByValSize()+7)/8;
6067  if (NumGPRsUsed > NumGPRs)
6068  HasParameterArea = true;
6069  } else {
6070  switch (ArgVT.getSimpleVT().SimpleTy) {
6071  default: llvm_unreachable("Unexpected ValueType for argument!");
6072  case MVT::i1:
6073  case MVT::i32:
6074  case MVT::i64:
6075  if (++NumGPRsUsed <= NumGPRs)
6076  continue;
6077  break;
6078  case MVT::v4i32:
6079  case MVT::v8i16:
6080  case MVT::v16i8:
6081  case MVT::v2f64:
6082  case MVT::v2i64:
6083  case MVT::v1i128:
6084  case MVT::f128:
6085  if (++NumVRsUsed <= NumVRs)
6086  continue;
6087  break;
6088  case MVT::v4f32:
6089  if (++NumVRsUsed <= NumVRs)
6090  continue;
6091  break;
6092  case MVT::f32:
6093  case MVT::f64:
6094  if (++NumFPRsUsed <= NumFPRs)
6095  continue;
6096  break;
6097  }
6098  HasParameterArea = true;
6099  }
6100  }
6101 
6102  /* Respect alignment of argument on the stack. */
6103  auto Alignement =
6104  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6105  NumBytes = alignTo(NumBytes, Alignement);
6106 
6107  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6108  if (Flags.isInConsecutiveRegsLast())
6109  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6110  }
6111 
6112  unsigned NumBytesActuallyUsed = NumBytes;
6113 
6114  // In the old ELFv1 ABI,
6115  // the prolog code of the callee may store up to 8 GPR argument registers to
6116  // the stack, allowing va_start to index over them in memory if its varargs.
6117  // Because we cannot tell if this is needed on the caller side, we have to
6118  // conservatively assume that it is needed. As such, make sure we have at
6119  // least enough stack space for the caller to store the 8 GPRs.
6120  // In the ELFv2 ABI, we allocate the parameter area iff a callee
6121  // really requires memory operands, e.g. a vararg function.
6122  if (HasParameterArea)
6123  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6124  else
6125  NumBytes = LinkageSize;
6126 
6127  // Tail call needs the stack to be aligned.
6128  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6129  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6130 
6131  int SPDiff = 0;
6132 
6133  // Calculate by how many bytes the stack has to be adjusted in case of tail
6134  // call optimization.
6135  if (!IsSibCall)
6136  SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6137 
6138  // To protect arguments on the stack from being clobbered in a tail call,
6139  // force all the loads to happen before doing any other lowering.
6140  if (CFlags.IsTailCall)
6141  Chain = DAG.getStackArgumentTokenFactor(Chain);
6142 
6143  // Adjust the stack pointer for the new arguments...
6144  // These operations are automatically eliminated by the prolog/epilog pass
6145  if (!IsSibCall)
6146  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6147  SDValue CallSeqStart = Chain;
6148 
6149  // Load the return address and frame pointer so it can be move somewhere else
6150  // later.
6151  SDValue LROp, FPOp;
6152  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6153 
6154  // Set up a copy of the stack pointer for use loading and storing any
6155  // arguments that may not fit in the registers available for argument
6156  // passing.
6157  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6158 
6159  // Figure out which arguments are going to go in registers, and which in
6160  // memory. Also, if this is a vararg function, floating point operations
6161  // must be stored to our stack, and loaded into integer regs as well, if
6162  // any integer regs are available for argument passing.
6163  unsigned ArgOffset = LinkageSize;
6164 
6166  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6167 
6168  SmallVector<SDValue, 8> MemOpChains;
6169  for (unsigned i = 0; i != NumOps; ++i) {
6170  SDValue Arg = OutVals[i];
6171  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6172  EVT ArgVT = Outs[i].VT;
6173  EVT OrigVT = Outs[i].ArgVT;
6174 
6175  // PtrOff will be used to store the current argument to the stack if a
6176  // register cannot be found for it.
6177  SDValue PtrOff;
6178 
6179  // We re-align the argument offset for each argument, except when using the
6180  // fast calling convention, when we need to make sure we do that only when
6181  // we'll actually use a stack slot.
6182  auto ComputePtrOff = [&]() {
6183  /* Respect alignment of argument on the stack. */
6184  auto Alignment =
6185  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6186  ArgOffset = alignTo(ArgOffset, Alignment);
6187 
6188  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6189 
6190  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6191  };
6192 
6193  if (!IsFastCall) {
6194  ComputePtrOff();
6195 
6196  /* Compute GPR index associated with argument offset. */
6197  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6198  GPR_idx = std::min(GPR_idx, NumGPRs);
6199  }
6200 
6201  // Promote integers to 64-bit values.
6202  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6203  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6204  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6205  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6206  }
6207 
6208  // FIXME memcpy is used way more than necessary. Correctness first.
6209  // Note: "by value" is code for passing a structure by value, not
6210  // basic types.
6211  if (Flags.isByVal()) {
6212  // Note: Size includes alignment padding, so
6213  // struct x { short a; char b; }
6214  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6215  // These are the proper values we need for right-justifying the
6216  // aggregate in a parameter register.
6217  unsigned Size = Flags.getByValSize();
6218 
6219  // An empty aggregate parameter takes up no storage and no
6220  // registers.
6221  if (Size == 0)
6222  continue;
6223 
6224  if (IsFastCall)
6225  ComputePtrOff();
6226 
6227  // All aggregates smaller than 8 bytes must be passed right-justified.
6228  if (Size==1 || Size==2 || Size==4) {
6229  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6230  if (GPR_idx != NumGPRs) {
6231  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6232  MachinePointerInfo(), VT);
6233  MemOpChains.push_back(Load.getValue(1));
6234  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6235 
6236  ArgOffset += PtrByteSize;
6237  continue;
6238  }
6239  }
6240 
6241  if (GPR_idx == NumGPRs && Size < 8) {
6242  SDValue AddPtr = PtrOff;
6243  if (!isLittleEndian) {
6244  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6245  PtrOff.getValueType());
6246  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6247  }
6248  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6249  CallSeqStart,
6250  Flags, DAG, dl);
6251  ArgOffset += PtrByteSize;
6252  continue;
6253  }
6254  // Copy the object to parameter save area if it can not be entirely passed
6255  // by registers.
6256  // FIXME: we only need to copy the parts which need to be passed in
6257  // parameter save area. For the parts passed by registers, we don't need
6258  // to copy them to the stack although we need to allocate space for them
6259  // in parameter save area.
6260  if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6261  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6262  CallSeqStart,
6263  Flags, DAG, dl);
6264 
6265  // When a register is available, pass a small aggregate right-justified.
6266  if (Size < 8 && GPR_idx != NumGPRs) {
6267  // The easiest way to get this right-justified in a register
6268  // is to copy the structure into the rightmost portion of a
6269  // local variable slot, then load the whole slot into the
6270  // register.
6271  // FIXME: The memcpy seems to produce pretty awful code for
6272  // small aggregates, particularly for packed ones.
6273  // FIXME: It would be preferable to use the slot in the
6274  // parameter save area instead of a new local variable.
6275  SDValue AddPtr = PtrOff;
6276  if (!isLittleEndian) {
6277  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6278  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6279  }
6280  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6281  CallSeqStart,
6282  Flags, DAG, dl);
6283 
6284  // Load the slot into the register.
6285  SDValue Load =
6286  DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6287  MemOpChains.push_back(Load.getValue(1));
6288  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6289 
6290  // Done with this argument.
6291  ArgOffset += PtrByteSize;
6292  continue;
6293  }
6294 
6295  // For aggregates larger than PtrByteSize, copy the pieces of the
6296  // object that fit into registers from the parameter save area.
6297  for (unsigned j=0; j<Size; j+=PtrByteSize) {
6298  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6299  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6300  if (GPR_idx != NumGPRs) {
6301  unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6302  EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6303  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6304  MachinePointerInfo(), ObjType);
6305 
6306  MemOpChains.push_back(Load.getValue(1));
6307  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6308  ArgOffset += PtrByteSize;
6309  } else {
6310  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6311  break;
6312  }
6313  }
6314  continue;
6315  }
6316 
6317  switch (Arg.getSimpleValueType().SimpleTy) {
6318  default: llvm_unreachable("Unexpected ValueType for argument!");
6319  case MVT::i1:
6320  case MVT::i32:
6321  case MVT::i64:
6322  if (Flags.isNest()) {
6323  // The 'nest' parameter, if any, is passed in R11.
6324  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6325  break;
6326  }
6327 
6328  // These can be scalar arguments or elements of an integer array type
6329  // passed directly. Clang may use those instead of "byval" aggregate
6330  // types to avoid forcing arguments to memory unnecessarily.
6331  if (GPR_idx != NumGPRs) {
6332  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6333  } else {
6334  if (IsFastCall)
6335  ComputePtrOff();
6336 
6337  assert(HasParameterArea &&
6338  "Parameter area must exist to pass an argument in memory.");
6339  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6340  true, CFlags.IsTailCall, false, MemOpChains,
6341  TailCallArguments, dl);
6342  if (IsFastCall)
6343  ArgOffset += PtrByteSize;
6344  }
6345  if (!IsFastCall)
6346  ArgOffset += PtrByteSize;
6347  break;
6348  case MVT::f32:
6349  case MVT::f64: {
6350  // These can be scalar arguments or elements of a float array type
6351  // passed directly. The latter are used to implement ELFv2 homogenous
6352  // float aggregates.
6353 
6354  // Named arguments go into FPRs first, and once they overflow, the
6355  // remaining arguments go into GPRs and then the parameter save area.
6356  // Unnamed arguments for vararg functions always go to GPRs and
6357  // then the parameter save area. For now, put all arguments to vararg
6358  // routines always in both locations (FPR *and* GPR or stack slot).
6359  bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6360  bool NeededLoad = false;
6361 
6362  // First load the argument into the next available FPR.
6363  if (FPR_idx != NumFPRs)
6364  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6365 
6366  // Next, load the argument into GPR or stack slot if needed.
6367  if (!NeedGPROrStack)
6368  ;
6369  else if (GPR_idx != NumGPRs && !IsFastCall) {
6370  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6371  // once we support fp <-> gpr moves.
6372 
6373  // In the non-vararg case, this can only ever happen in the
6374  // presence of f32 array types, since otherwise we never run
6375  // out of FPRs before running out of GPRs.
6376  SDValue ArgVal;
6377 
6378  // Double values are always passed in a single GPR.
6379  if (Arg.getValueType() != MVT::f32) {
6380  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6381 
6382  // Non-array float values are extended and passed in a GPR.
6383  } else if (!Flags.isInConsecutiveRegs()) {
6384  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6385  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6386 
6387  // If we have an array of floats, we collect every odd element
6388  // together with its predecessor into one GPR.
6389  } else if (ArgOffset % PtrByteSize != 0) {
6390  SDValue Lo, Hi;
6391  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6392  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6393  if (!isLittleEndian)
6394  std::swap(Lo, Hi);
6395  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6396 
6397  // The final element, if even, goes into the first half of a GPR.
6398  } else if (Flags.isInConsecutiveRegsLast()) {
6399  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6400  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6401  if (!isLittleEndian)
6402  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6403  DAG.getConstant(32, dl, MVT::i32));
6404 
6405  // Non-final even elements are skipped; they will be handled
6406  // together the with subsequent argument on the next go-around.
6407  } else
6408  ArgVal = SDValue();
6409 
6410  if (ArgVal.getNode())
6411  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6412  } else {
6413  if (IsFastCall)
6414  ComputePtrOff();
6415 
6416  // Single-precision floating-point values are mapped to the
6417  // second (rightmost) word of the stack doubleword.
6418  if (Arg.getValueType() == MVT::f32 &&
6419  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6420  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6421  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6422  }
6423 
6424  assert(HasParameterArea &&
6425  "Parameter area must exist to pass an argument in memory.");
6426  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6427  true, CFlags.IsTailCall, false, MemOpChains,
6428  TailCallArguments, dl);
6429 
6430  NeededLoad = true;
6431  }
6432  // When passing an array of floats, the array occupies consecutive
6433  // space in the argument area; only round up to the next doubleword
6434  // at the end of the array. Otherwise, each float takes 8 bytes.
6435  if (!IsFastCall || NeededLoad) {
6436  ArgOffset += (Arg.getValueType() == MVT::f32 &&
6437  Flags.isInConsecutiveRegs()) ? 4 : 8;
6438  if (Flags.isInConsecutiveRegsLast())
6439  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6440  }
6441  break;
6442  }
6443  case MVT::v4f32:
6444  case MVT::v4i32:
6445  case MVT::v8i16:
6446  case MVT::v16i8:
6447  case MVT::v2f64:
6448  case MVT::v2i64:
6449  case MVT::v1i128:
6450  case MVT::f128:
6451  // These can be scalar arguments or elements of a vector array type
6452  // passed directly. The latter are used to implement ELFv2 homogenous
6453  // vector aggregates.
6454 
6455  // For a varargs call, named arguments go into VRs or on the stack as
6456  // usual; unnamed arguments always go to the stack or the corresponding
6457  // GPRs when within range. For now, we always put the value in both
6458  // locations (or even all three).
6459  if (CFlags.IsVarArg) {
6460  assert(HasParameterArea &&
6461  "Parameter area must exist if we have a varargs call.");
6462  // We could elide this store in the case where the object fits
6463  // entirely in R registers. Maybe later.
6464  SDValue Store =
6465  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6466  MemOpChains.push_back(Store);
6467  if (VR_idx != NumVRs) {
6468  SDValue Load =
6469  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6470  MemOpChains.push_back(Load.getValue(1));
6471  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6472  }
6473  ArgOffset += 16;
6474  for (unsigned i=0; i<16; i+=PtrByteSize) {
6475  if (GPR_idx == NumGPRs)
6476  break;
6477  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6478  DAG.getConstant(i, dl, PtrVT));
6479  SDValue Load =
6480  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6481  MemOpChains.push_back(Load.getValue(1));
6482  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6483  }
6484  break;
6485  }
6486 
6487  // Non-varargs Altivec params go into VRs or on the stack.
6488  if (VR_idx != NumVRs) {
6489  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6490  } else {
6491  if (IsFastCall)
6492  ComputePtrOff();
6493 
6494  assert(HasParameterArea &&
6495  "Parameter area must exist to pass an argument in memory.");
6496  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6497  true, CFlags.IsTailCall, true, MemOpChains,
6498  TailCallArguments, dl);
6499  if (IsFastCall)
6500  ArgOffset += 16;
6501  }
6502 
6503  if (!IsFastCall)
6504  ArgOffset += 16;
6505  break;
6506  }
6507  }
6508 
6509  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6510  "mismatch in size of parameter area");
6511  (void)NumBytesActuallyUsed;
6512 
6513  if (!MemOpChains.empty())
6514  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6515 
6516  // Check if this is an indirect call (MTCTR/BCTRL).
6517  // See prepareDescriptorIndirectCall and buildCallOperands for more
6518  // information about calls through function pointers in the 64-bit SVR4 ABI.
6519  if (CFlags.IsIndirect) {
6520  // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6521  // caller in the TOC save area.
6522  if (isTOCSaveRestoreRequired(Subtarget)) {
6523  assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6524  // Load r2 into a virtual register and store it to the TOC save area.
6525  setUsesTOCBasePtr(DAG);
6526  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6527  // TOC save area offset.
6528  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6529  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6530  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6531  Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6533  DAG.getMachineFunction(), TOCSaveOffset));
6534  }
6535  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6536  // This does not mean the MTCTR instruction must use R12; it's easier
6537  // to model this as an extra parameter, so do that.
6538  if (isELFv2ABI && !CFlags.IsPatchPoint)
6539  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6540  }
6541 
6542  // Build a sequence of copy-to-reg nodes chained together with token chain
6543  // and flag operands which copy the outgoing args into the appropriate regs.
6544  SDValue InFlag;
6545  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6546  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6547  RegsToPass[i].second, InFlag);
6548  InFlag = Chain.getValue(1);
6549  }
6550 
6551  if (CFlags.IsTailCall && !IsSibCall)
6552  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6553  TailCallArguments);
6554 
6555  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6556  Callee, SPDiff, NumBytes, Ins, InVals, CB);
6557 }
6558 
6559 // Returns true when the shadow of a general purpose argument register
6560 // in the parameter save area is aligned to at least 'RequiredAlign'.
6561 static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6562  assert(RequiredAlign.value() <= 16 &&
6563  "Required alignment greater than stack alignment.");
6564  switch (Reg) {
6565  default:
6566  report_fatal_error("called on invalid register.");
6567  case PPC::R5:
6568  case PPC::R9:
6569  case PPC::X3:
6570  case PPC::X5:
6571  case PPC::X7:
6572  case PPC::X9:
6573  // These registers are 16 byte aligned which is the most strict aligment
6574  // we can support.
6575  return true;
6576  case PPC::R3:
6577  case PPC::R7:
6578  case PPC::X4:
6579  case PPC::X6:
6580  case PPC::X8:
6581  case PPC::X10:
6582  // The shadow of these registers in the PSA is 8 byte aligned.
6583  return RequiredAlign <= 8;
6584  case PPC::R4:
6585  case PPC::R6:
6586  case PPC::R8:
6587  case PPC::R10:
6588  return RequiredAlign <= 4;
6589  }
6590 }
6591 
6592 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6593  CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6594  CCState &S) {
6595  AIXCCState &State = static_cast<AIXCCState &>(S);
6596  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6597  State.getMachineFunction().getSubtarget());
6598  const bool IsPPC64 = Subtarget.isPPC64();
6599  const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6600  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6601 
6602  if (ValVT == MVT::f128)
6603  report_fatal_error("f128 is unimplemented on AIX.");
6604 
6605  if (ArgFlags.isNest())
6606  report_fatal_error("Nest arguments are unimplemented.");
6607 
6608  static const MCPhysReg GPR_32[] = {// 32-bit registers.
6609  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6610  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6611  static const MCPhysReg GPR_64[] = {// 64-bit registers.
6612  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6613  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6614 
6615  static const MCPhysReg VR[] = {// Vector registers.
6616  PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6617  PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6618  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6619 
6620  if (ArgFlags.isByVal()) {
6621  if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6622  report_fatal_error("Pass-by-value arguments with alignment greater than "
6623  "register width are not supported.");
6624 
6625  const unsigned ByValSize = ArgFlags.getByValSize();
6626 
6627  // An empty aggregate parameter takes up no storage and no registers,
6628  // but needs a MemLoc for a stack slot for the formal arguments side.
6629  if (ByValSize == 0) {
6631  State.getNextStackOffset(), RegVT,
6632  LocInfo));
6633  return false;
6634  }
6635 
6636  const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6637  unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6638  for (const unsigned E = Offset + StackSize; Offset < E;
6639  Offset += PtrAlign.value()) {
6640  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6641  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6642  else {
6645  LocInfo));
6646  break;
6647  }
6648  }
6649  return false;
6650  }
6651 
6652  // Arguments always reserve parameter save area.
6653  switch (ValVT.SimpleTy) {
6654  default:
6655  report_fatal_error("Unhandled value type for argument.");
6656  case MVT::i64:
6657  // i64 arguments should have been split to i32 for PPC32.
6658  assert(IsPPC64 && "PPC32 should have split i64 values.");
6659  [[fallthrough]];
6660  case MVT::i1:
6661  case MVT::i32: {
6662  const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6663  // AIX integer arguments are always passed in register width.
6664  if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6665  LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6666  : CCValAssign::LocInfo::ZExt;
6667  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6668  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6669  else
6670  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6671 
6672  return false;
6673  }
6674  case MVT::f32:
6675  case MVT::f64: {
6676  // Parameter save area (PSA) is reserved even if the float passes in fpr.
6677  const unsigned StoreSize = LocVT.getStoreSize();
6678  // Floats are always 4-byte aligned in the PSA on AIX.
6679  // This includes f64 in 64-bit mode for ABI compatibility.
6680  const unsigned Offset =
6681  State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6682  unsigned FReg = State.AllocateReg(FPR);
6683  if (FReg)
6684  State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6685 
6686  // Reserve and initialize GPRs or initialize the PSA as required.
6687  for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6688  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6689  assert(FReg && "An FPR should be available when a GPR is reserved.");
6690  if (State.isVarArg()) {
6691  // Successfully reserved GPRs are only initialized for vararg calls.
6692  // Custom handling is required for:
6693  // f64 in PPC32 needs to be split into 2 GPRs.
6694  // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6695  State.addLoc(
6696  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6697  }
6698  } else {
6699  // If there are insufficient GPRs, the PSA needs to be initialized.
6700  // Initialization occurs even if an FPR was initialized for
6701  // compatibility with the AIX XL compiler. The full memory for the
6702  // argument will be initialized even if a prior word is saved in GPR.
6703  // A custom memLoc is used when the argument also passes in FPR so
6704  // that the callee handling can skip over it easily.
6705  State.addLoc(
6706  FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6707  LocInfo)
6708  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6709  break;
6710  }
6711  }
6712 
6713  return false;
6714  }
6715  case MVT::v4f32:
6716  case MVT::v4i32:
6717  case MVT::v8i16:
6718  case MVT::v16i8:
6719  case MVT::v2i64:
6720  case MVT::v2f64:
6721  case MVT::v1i128: {
6722  const unsigned VecSize = 16;
6723  const Align VecAlign(VecSize);
6724 
6725  if (!State.isVarArg()) {
6726  // If there are vector registers remaining we don't consume any stack
6727  // space.
6728  if (unsigned VReg = State.AllocateReg(VR)) {
6729  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6730  return false;
6731  }
6732  // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6733  // might be allocated in the portion of the PSA that is shadowed by the
6734  // GPRs.
6735  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6736  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6737  return false;
6738  }
6739 
6740  const unsigned PtrSize = IsPPC64 ? 8 : 4;
6741  ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6742 
6743  unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6744  // Burn any underaligned registers and their shadowed stack space until
6745  // we reach the required alignment.
6746  while (NextRegIndex != GPRs.size() &&
6747  !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6748  // Shadow allocate register and its stack shadow.
6749  unsigned Reg = State.AllocateReg(GPRs);
6750  State.AllocateStack(PtrSize, PtrAlign);
6751  assert(Reg && "Allocating register unexpectedly failed.");
6752  (void)Reg;
6753  NextRegIndex = State.getFirstUnallocated(GPRs);
6754  }
6755 
6756  // Vectors that are passed as fixed arguments are handled differently.
6757  // They are passed in VRs if any are available (unlike arguments passed
6758  // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6759  // functions)
6760  if (State.isFixed(ValNo)) {
6761  if (unsigned VReg = State.AllocateReg(VR)) {
6762  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6763  // Shadow allocate GPRs and stack space even though we pass in a VR.
6764  for (unsigned I = 0; I != VecSize; I += PtrSize)
6765  State.AllocateReg(GPRs);
6766  State.AllocateStack(VecSize, VecAlign);
6767  return false;
6768  }
6769  // No vector registers remain so pass on the stack.
6770  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6771  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6772  return false;
6773  }
6774 
6775  // If all GPRS are consumed then we pass the argument fully on the stack.
6776  if (NextRegIndex == GPRs.size()) {
6777  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6778  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6779  return false;
6780  }
6781 
6782  // Corner case for 32-bit codegen. We have 2 registers to pass the first
6783  // half of the argument, and then need to pass the remaining half on the
6784  // stack.
6785  if (GPRs[NextRegIndex] == PPC::R9) {
6786  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6787  State.addLoc(
6788  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6789 
6790  const unsigned FirstReg = State.AllocateReg(PPC::R9);
6791  const unsigned SecondReg = State.AllocateReg(PPC::R10);
6792  assert(FirstReg && SecondReg &&
6793  "Allocating R9 or R10 unexpectedly failed.");
6794  State.addLoc(
6795  CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6796  State.addLoc(
6797  CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6798  return false;
6799  }
6800 
6801  // We have enough GPRs to fully pass the vector argument, and we have
6802  // already consumed any underaligned registers. Start with the custom
6803  // MemLoc and then the custom RegLocs.
6804  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6805  State.addLoc(
6806  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6807  for (unsigned I = 0; I != VecSize; I += PtrSize) {
6808  const unsigned Reg = State.AllocateReg(GPRs);
6809  assert(Reg && "Failed to allocated register for vararg vector argument");
6810  State.addLoc(
6811  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6812  }
6813  return false;
6814  }
6815  }
6816  return true;
6817 }
6818 
6819 // So far, this function is only used by LowerFormalArguments_AIX()
6821  bool IsPPC64,
6822  bool HasP8Vector,
6823  bool HasVSX) {
6824  assert((IsPPC64 || SVT != MVT::i64) &&
6825  "i64 should have been split for 32-bit codegen.");
6826 
6827  switch (SVT) {
6828  default:
6829  report_fatal_error("Unexpected value type for formal argument");
6830  case MVT::i1:
6831  case MVT::i32:
6832  case MVT::i64:
6833  return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6834  case MVT::f32:
6835  return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6836  case MVT::f64:
6837  return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6838  case MVT::v4f32:
6839  case MVT::v4i32:
6840  case MVT::v8i16:
6841  case MVT::v16i8:
6842  case MVT::v2i64:
6843  case MVT::v2f64:
6844  case MVT::v1i128:
6845  return &PPC::VRRCRegClass;
6846  }
6847 }
6848 
6850  SelectionDAG &DAG, SDValue ArgValue,
6851  MVT LocVT, const SDLoc &dl) {
6852  assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6853  assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6854 
6855  if (Flags.isSExt())
6856  ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6857  DAG.getValueType(ValVT));
6858  else if (Flags.isZExt())
6859  ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6860  DAG.getValueType(ValVT));
6861 
6862  return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6863 }
6864 
6865 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6866  const unsigned LASize = FL->getLinkageSize();
6867 
6868  if (PPC::GPRCRegClass.contains(Reg)) {
6869  assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6870  "Reg must be a valid argument register!");
6871  return LASize + 4 * (Reg - PPC::R3);
6872  }
6873 
6874  if (PPC::G8RCRegClass.contains(Reg)) {
6875  assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6876  "Reg must be a valid argument register!");
6877  return LASize + 8 * (Reg - PPC::X3);
6878  }
6879 
6880  llvm_unreachable("Only general purpose registers expected.");
6881 }
6882 
6883 // AIX ABI Stack Frame Layout:
6884 //
6885 // Low Memory +--------------------------------------------+
6886 // SP +---> | Back chain | ---+
6887 // | +--------------------------------------------+ |
6888 // | | Saved Condition Register | |
6889 // | +--------------------------------------------+ |
6890 // | | Saved Linkage Register | |
6891 // | +--------------------------------------------+ | Linkage Area
6892 // | | Reserved for compilers | |
6893 // | +--------------------------------------------+ |
6894 // | | Reserved for binders | |
6895 // | +--------------------------------------------+ |
6896 // | | Saved TOC pointer | ---+
6897 // | +--------------------------------------------+
6898 // | | Parameter save area |
6899 // | +--------------------------------------------+
6900 // | | Alloca space |
6901 // | +--------------------------------------------+
6902 // | | Local variable space |
6903 // | +--------------------------------------------+
6904 // | | Float/int conversion temporary |
6905 // | +--------------------------------------------+
6906 // | | Save area for AltiVec registers |
6907 // | +--------------------------------------------+
6908 // | | AltiVec alignment padding |
6909 // | +--------------------------------------------+
6910 // | | Save area for VRSAVE register |
6911 // | +--------------------------------------------+
6912 // | | Save area for General Purpose registers |
6913 // | +--------------------------------------------+
6914 // | | Save area for Floating Point registers |
6915 // | +--------------------------------------------+
6916 // +---- | Back chain |
6917 // High Memory +--------------------------------------------+
6918 //
6919 // Specifications:
6920 // AIX 7.2 Assembler Language Reference
6921 // Subroutine linkage convention
6922 
6923 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6924  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6925  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6926  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6927 
6928  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6929  CallConv == CallingConv::Fast) &&
6930  "Unexpected calling convention!");
6931 
6932  if (getTargetMachine().Options.GuaranteedTailCallOpt)
6933  report_fatal_error("Tail call support is unimplemented on AIX.");
6934 
6935  if (useSoftFloat())
6936  report_fatal_error("Soft float support is unimplemented on AIX.");
6937 
6938  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
6939 
6940  const bool IsPPC64 = Subtarget.isPPC64();
6941  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6942 
6943  // Assign locations to all of the incoming arguments.
6945  MachineFunction &MF = DAG.getMachineFunction();
6946  MachineFrameInfo &MFI = MF.getFrameInfo();
6947  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6948  AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6949 
6950  const EVT PtrVT = getPointerTy(MF.getDataLayout());
6951  // Reserve space for the linkage area on the stack.
6952  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6953  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6954  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6955 
6956  SmallVector<SDValue, 8> MemOps;
6957 
6958  for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6959  CCValAssign &VA = ArgLocs[I++];
6960  MVT LocVT = VA.getLocVT();
6961  MVT ValVT = VA.getValVT();
6962  ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6963  // For compatibility with the AIX XL compiler, the float args in the
6964  // parameter save area are initialized even if the argument is available
6965  // in register. The caller is required to initialize both the register
6966  // and memory, however, the callee can choose to expect it in either.
6967  // The memloc is dismissed here because the argument is retrieved from
6968  // the register.
6969  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
6970  continue;
6971 
6972  auto HandleMemLoc = [&]() {
6973  const unsigned LocSize = LocVT.getStoreSize();
6974  const unsigned ValSize = ValVT.getStoreSize();
6975  assert((ValSize <= LocSize) &&
6976  "Object size is larger than size of MemLoc");
6977  int CurArgOffset = VA.getLocMemOffset();
6978  // Objects are right-justified because AIX is big-endian.
6979  if (LocSize > ValSize)
6980  CurArgOffset += LocSize - ValSize;
6981  // Potential tail calls could cause overwriting of argument stack slots.
6982  const bool IsImmutable =
6984  (CallConv == CallingConv::Fast));
6985  int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6986  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6987  SDValue ArgValue =
6988  DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6989  InVals.push_back(ArgValue);
6990  };
6991 
6992  // Vector arguments to VaArg functions are passed both on the stack, and
6993  // in any available GPRs. Load the value from the stack and add the GPRs
6994  // as live ins.
6995  if (VA.isMemLoc() && VA.needsCustom()) {
6996  assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
6997  assert(isVarArg && "Only use custom memloc for vararg.");
6998  // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
6999  // matching custom RegLocs.
7000  const unsigned OriginalValNo = VA.getValNo();
7001  (void)OriginalValNo;
7002 
7003  auto HandleCustomVecRegLoc = [&]() {
7004  assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7005  "Missing custom RegLoc.");
7006  VA = ArgLocs[I++];
7007  assert(VA.getValVT().isVector() &&
7008  "Unexpected Val type for custom RegLoc.");
7009  assert(VA.getValNo() == OriginalValNo &&
7010  "ValNo mismatch between custom MemLoc and RegLoc.");
7012  MF.addLiveIn(VA.getLocReg(),
7013  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7014  Subtarget.hasVSX()));
7015  };
7016 
7017  HandleMemLoc();
7018  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7019  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7020  // R10.
7021  HandleCustomVecRegLoc();
7022  HandleCustomVecRegLoc();
7023 
7024  // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7025  // we passed the vector in R5, R6, R7 and R8.
7026  if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7027  assert(!IsPPC64 &&
7028  "Only 2 custom RegLocs expected for 64-bit codegen.");
7029  HandleCustomVecRegLoc();
7030  HandleCustomVecRegLoc();
7031  }
7032 
7033  continue;
7034  }
7035 
7036  if (VA.isRegLoc()) {
7037  if (VA.getValVT().isScalarInteger())
7039  else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7040  switch (VA.getValVT().SimpleTy) {
7041  default:
7042  report_fatal_error("Unhandled value type for argument.");
7043  case MVT::f32:
7045  break;
7046  case MVT::f64:
7048  break;
7049  }
7050  } else if (VA.getValVT().isVector()) {
7051  switch (VA.getValVT().SimpleTy) {
7052  default:
7053  report_fatal_error("Unhandled value type for argument.");
7054  case MVT::v16i8:
7056  break;
7057  case MVT::v8i16:
7059  break;
7060  case MVT::v4i32:
7061  case MVT::v2i64:
7062  case MVT::v1i128:
7064  break;
7065  case MVT::v4f32:
7066  case MVT::v2f64:
7068  break;
7069  }
7070  }
7071  }
7072 
7073  if (Flags.isByVal() && VA.isMemLoc()) {
7074  const unsigned Size =
7075  alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7076  PtrByteSize);
7077  const int FI = MF.getFrameInfo().CreateFixedObject(
7078  Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7079  /* IsAliased */ true);
7080  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7081  InVals.push_back(FIN);
7082 
7083  continue;
7084  }
7085 
7086  if (Flags.isByVal()) {
7087  assert(VA.isRegLoc() && "MemLocs should already be handled.");
7088 
7089  const MCPhysReg ArgReg = VA.getLocReg();
7090  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7091 
7092  if (Flags.getNonZeroByValAlign() > PtrByteSize)
7093  report_fatal_error("Over aligned byvals not supported yet.");
7094 
7095  const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7096  const int FI = MF.getFrameInfo().CreateFixedObject(
7097  StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7098  /* IsAliased */ true);
7099  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7100  InVals.push_back(FIN);
7101 
7102  // Add live ins for all the RegLocs for the same ByVal.
7103  const TargetRegisterClass *RegClass =
7104  IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7105 
7106  auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7107  unsigned Offset) {
7108  const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7109  // Since the callers side has left justified the aggregate in the
7110  // register, we can simply store the entire register into the stack
7111  // slot.
7112  SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7113  // The store to the fixedstack object is needed becuase accessing a
7114  // field of the ByVal will use a gep and load. Ideally we will optimize
7115  // to extracting the value from the register directly, and elide the
7116  // stores when the arguments address is not taken, but that will need to
7117  // be future work.
7118  SDValue Store = DAG.getStore(
7119  CopyFrom.getValue(1), dl, CopyFrom,
7120  DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7122 
7123  MemOps.push_back(Store);
7124  };
7125 
7126  unsigned Offset = 0;
7127  HandleRegLoc(VA.getLocReg(), Offset);
7128  Offset += PtrByteSize;
7129  for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7130  Offset += PtrByteSize) {
7131  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7132  "RegLocs should be for ByVal argument.");
7133 
7134  const CCValAssign RL = ArgLocs[I++];
7135  HandleRegLoc(RL.getLocReg(), Offset);
7137  }
7138 
7139  if (Offset != StackSize) {
7140  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7141  "Expected MemLoc for remaining bytes.");
7142  assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7143  // Consume the MemLoc.The InVal has already been emitted, so nothing
7144  // more needs to be done.
7145  ++I;
7146  }
7147 
7148  continue;
7149  }
7150 
7151  if (VA.isRegLoc() && !VA.needsCustom()) {
7152  MVT::SimpleValueType SVT = ValVT.SimpleTy;
7153  Register VReg =
7154  MF.addLiveIn(VA.getLocReg(),
7155  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7156  Subtarget.hasVSX()));
7157  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7158  if (ValVT.isScalarInteger() &&
7159  (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7160  ArgValue =
7161  truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7162  }
7163  InVals.push_back(ArgValue);
7164  continue;
7165  }
7166  if (VA.isMemLoc()) {
7167  HandleMemLoc();
7168  continue;
7169  }
7170  }
7171 
7172  // On AIX a minimum of 8 words is saved to the parameter save area.
7173  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7174  // Area that is at least reserved in the caller of this function.
7175  unsigned CallerReservedArea =
7176  std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7177 
7178  // Set the size that is at least reserved in caller of this function. Tail
7179  // call optimized function's reserved stack space needs to be aligned so
7180  // that taking the difference between two stack areas will result in an
7181  // aligned stack.
7182  CallerReservedArea =
7183  EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7184  FuncInfo->setMinReservedArea(CallerReservedArea);
7185 
7186  if (isVarArg) {
7187  FuncInfo->setVarArgsFrameIndex(
7188  MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7189  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7190 
7191  static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7192  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7193 
7194  static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7195  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7196  const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7197 
7198  // The fixed integer arguments of a variadic function are stored to the
7199  // VarArgsFrameIndex on the stack so that they may be loaded by
7200  // dereferencing the result of va_next.
7201  for (unsigned GPRIndex =
7202  (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7203  GPRIndex < NumGPArgRegs; ++GPRIndex) {
7204 
7205  const Register VReg =
7206  IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7207  : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7208 
7209  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7210  SDValue Store =
7211  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7212  MemOps.push_back(Store);
7213  // Increment the address for the next argument to store.
7214  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7215  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7216  }
7217  }
7218 
7219  if (!MemOps.empty())
7220  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7221 
7222  return Chain;
7223 }
7224 
7225 SDValue PPCTargetLowering::LowerCall_AIX(
7226  SDValue Chain, SDValue Callee, CallFlags CFlags,
7227  const SmallVectorImpl<ISD::OutputArg> &Outs,
7228  const SmallVectorImpl<SDValue> &OutVals,
7229  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7230  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7231  const CallBase *CB) const {
7232  // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7233  // AIX ABI stack frame layout.
7234 
7235  assert((CFlags.CallConv == CallingConv::C ||
7236  CFlags.CallConv == CallingConv::Cold ||
7237  CFlags.CallConv == CallingConv::Fast) &&
7238  "Unexpected calling convention!");
7239 
7240  if (CFlags.IsPatchPoint)
7241  report_fatal_error("This call type is unimplemented on AIX.");
7242 
7243  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7244 
7245  MachineFunction &MF = DAG.getMachineFunction();
7247  AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7248  *DAG.getContext());
7249 
7250  // Reserve space for the linkage save area (LSA) on the stack.
7251  // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7252  // [SP][CR][LR][2 x reserved][TOC].
7253  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7254  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7255  const bool IsPPC64 = Subtarget.isPPC64();
7256  const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7257  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7258  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7259  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7260 
7261  // The prolog code of the callee may store up to 8 GPR argument registers to
7262  // the stack, allowing va_start to index over them in memory if the callee
7263  // is variadic.
7264  // Because we cannot tell if this is needed on the caller side, we have to
7265  // conservatively assume that it is needed. As such, make sure we have at
7266  // least enough stack space for the caller to store the 8 GPRs.
7267  const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7268  const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7269  CCInfo.getNextStackOffset());
7270 
7271  // Adjust the stack pointer for the new arguments...
7272  // These operations are automatically eliminated by the prolog/epilog pass.
7273  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7274  SDValue CallSeqStart = Chain;
7275 
7277  SmallVector<SDValue, 8> MemOpChains;
7278 
7279  // Set up a copy of the stack pointer for loading and storing any
7280  // arguments that may not fit in the registers available for argument
7281  // passing.
7282  const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7283  : DAG.getRegister(PPC::R1, MVT::i32);
7284 
7285  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7286  const unsigned ValNo = ArgLocs[I].getValNo();
7287  SDValue Arg = OutVals[ValNo];
7288  ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7289 
7290  if (Flags.isByVal()) {
7291  const unsigned ByValSize = Flags.getByValSize();
7292 
7293  // Nothing to do for zero-sized ByVals on the caller side.
7294  if (!ByValSize) {
7295  ++I;
7296  continue;
7297  }
7298 
7299  auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7300  return DAG.getExtLoad(
7301  ISD::ZEXTLOAD, dl, PtrVT, Chain,
7302  (LoadOffset != 0)
7303  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7304  : Arg,
7305  MachinePointerInfo(), VT);
7306  };
7307 
7308  unsigned LoadOffset = 0;
7309 
7310  // Initialize registers, which are fully occupied by the by-val argument.
7311  while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7312  SDValue Load = GetLoad(PtrVT, LoadOffset);
7313  MemOpChains.push_back(Load.getValue(1));
7314  LoadOffset += PtrByteSize;
7315  const CCValAssign &ByValVA = ArgLocs[I++];
7316  assert(ByValVA.getValNo() == ValNo &&
7317  "Unexpected location for pass-by-value argument.");
7318  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7319  }
7320 
7321  if (LoadOffset == ByValSize)
7322  continue;
7323 
7324  // There must be one more loc to handle the remainder.
7325  assert(ArgLocs[I].getValNo() == ValNo &&
7326  "Expected additional location for by-value argument.");
7327 
7328  if (ArgLocs[I].isMemLoc()) {
7329  assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7330  const CCValAssign &ByValVA = ArgLocs[I++];
7331  ISD::ArgFlagsTy MemcpyFlags = Flags;
7332  // Only memcpy the bytes that don't pass in register.
7333  MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7334  Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7335  (LoadOffset != 0)
7336  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7337  : Arg,
7338  DAG.getObjectPtrOffset(dl, StackPtr,
7339  TypeSize::Fixed(ByValVA.getLocMemOffset())),
7340  CallSeqStart, MemcpyFlags, DAG, dl);
7341  continue;
7342  }
7343 
7344  // Initialize the final register residue.
7345  // Any residue that occupies the final by-val arg register must be
7346  // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7347  // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7348  // 2 and 1 byte loads.
7349  const unsigned ResidueBytes = ByValSize % PtrByteSize;
7350  assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7351  "Unexpected register residue for by-value argument.");
7352  SDValue ResidueVal;
7353  for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7354  const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7355  const MVT VT =
7356  N == 1 ? MVT::i8
7357  : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7358  SDValue Load = GetLoad(VT, LoadOffset);
7359  MemOpChains.push_back(Load.getValue(1));
7360  LoadOffset += N;
7361  Bytes += N;
7362 
7363  // By-val arguments are passed left-justfied in register.
7364  // Every load here needs to be shifted, otherwise a full register load
7365  // should have been used.
7366  assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7367  "Unexpected load emitted during handling of pass-by-value "
7368  "argument.");
7369  unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7370  EVT ShiftAmountTy =
7371  getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7372  SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7373  SDValue ShiftedLoad =
7374  DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7375  ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7376  ShiftedLoad)
7377  : ShiftedLoad;
7378  }
7379 
7380  const CCValAssign &ByValVA = ArgLocs[I++];
7381  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7382  continue;
7383  }
7384 
7385  CCValAssign &VA = ArgLocs[I++];
7386  const MVT LocVT = VA.getLocVT();
7387  const MVT ValVT = VA.getValVT();
7388 
7389  switch (VA.getLocInfo()) {
7390  default:
7391  report_fatal_error("Unexpected argument extension type.");
7392  case CCValAssign::Full:
7393  break;
7394  case CCValAssign::ZExt:
7395  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7396  break;
7397  case CCValAssign::SExt:
7398  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7399  break;
7400  }
7401 
7402  if (VA.isRegLoc() && !VA.needsCustom()) {
7403  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7404  continue;
7405  }
7406 
7407  // Vector arguments passed to VarArg functions need custom handling when
7408  // they are passed (at least partially) in GPRs.
7409  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7410  assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7411  // Store value to its stack slot.
7412  SDValue PtrOff =
7413  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7414  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7415  SDValue Store =
7416  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7417  MemOpChains.push_back(Store);
7418  const unsigned OriginalValNo = VA.getValNo();
7419  // Then load the GPRs from the stack
7420  unsigned LoadOffset = 0;
7421  auto HandleCustomVecRegLoc = [&]() {
7422  assert(I != E && "Unexpected end of CCvalAssigns.");
7423  assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7424  "Expected custom RegLoc.");
7425  CCValAssign RegVA = ArgLocs[I++];
7426  assert(RegVA.getValNo() == OriginalValNo &&
7427  "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7428  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7429  DAG.getConstant(LoadOffset, dl, PtrVT));
7430  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7431  MemOpChains.push_back(Load.getValue(1));
7432  RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7433  LoadOffset += PtrByteSize;
7434  };
7435 
7436  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7437  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7438  // R10.
7439  HandleCustomVecRegLoc();
7440  HandleCustomVecRegLoc();
7441 
7442  if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7443  ArgLocs[I].getValNo() == OriginalValNo) {
7444  assert(!IsPPC64 &&
7445  "Only 2 custom RegLocs expected for 64-bit codegen.");
7446  HandleCustomVecRegLoc();
7447  HandleCustomVecRegLoc();
7448  }
7449 
7450  continue;
7451  }
7452 
7453  if (VA.isMemLoc()) {
7454  SDValue PtrOff =
7455  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7456  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7457  MemOpChains.push_back(
7458  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7459 
7460  continue;
7461  }
7462 
7463  if (!ValVT.isFloatingPoint())
7465  "Unexpected register handling for calling convention.");
7466 
7467  // Custom handling is used for GPR initializations for vararg float
7468  // arguments.
7469  assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7470  LocVT.isInteger() &&
7471  "Custom register handling only expected for VarArg.");
7472 
7473  SDValue ArgAsInt =
7475 
7476  if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7477  // f32 in 32-bit GPR
7478  // f64 in 64-bit GPR
7479  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7480  else if (Arg.getValueType().getFixedSizeInBits() <
7481  LocVT.getFixedSizeInBits())
7482  // f32 in 64-bit GPR.
7483  RegsToPass.push_back(std::make_pair(
7484  VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7485  else {
7486  // f64 in two 32-bit GPRs
7487  // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7488  assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7489  "Unexpected custom register for argument!");
7490  CCValAssign &GPR1 = VA;
7491  SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7492  DAG.getConstant(32, dl, MVT::i8));
7493  RegsToPass.push_back(std::make_pair(
7494  GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7495 
7496  if (I != E) {
7497  // If only 1 GPR was available, there will only be one custom GPR and
7498  // the argument will also pass in memory.
7499  CCValAssign &PeekArg = ArgLocs[I];
7500  if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7501  assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7502  CCValAssign &GPR2 = ArgLocs[I++];
7503  RegsToPass.push_back(std::make_pair(
7504  GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7505  }
7506  }
7507  }
7508  }
7509 
7510  if (!MemOpChains.empty())
7511  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7512 
7513  // For indirect calls, we need to save the TOC base to the stack for
7514  // restoration after the call.
7515  if (CFlags.IsIndirect) {
7516  assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7517  const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7518  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7519  const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7520  const unsigned TOCSaveOffset =
7521  Subtarget.getFrameLowering()->getTOCSaveOffset();
7522 
7523  setUsesTOCBasePtr(DAG);
7524  SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7525  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7526  SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7527  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7528  Chain = DAG.getStore(
7529  Val.getValue(1), dl, Val, AddPtr,
7530  MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7531  }
7532 
7533  // Build a sequence of copy-to-reg nodes chained together with token chain
7534  // and flag operands which copy the outgoing args into the appropriate regs.
7535  SDValue InFlag;
7536  for (auto Reg : RegsToPass) {
7537  Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7538  InFlag = Chain.getValue(1);
7539  }
7540 
7541  const int SPDiff = 0;
7542  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7543  Callee, SPDiff, NumBytes, Ins, InVals, CB);
7544 }
7545 
7546 bool
7547 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7548  MachineFunction &MF, bool isVarArg,
7549  const SmallVectorImpl<ISD::OutputArg> &Outs,
7550  LLVMContext &Context) const {
7552  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7553  return CCInfo.CheckReturn(
7554  Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7555  ? RetCC_PPC_Cold
7556  : RetCC_PPC);
7557 }
7558 
7559 SDValue
7560 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7561  bool isVarArg,
7562  const SmallVectorImpl<ISD::OutputArg> &Outs,
7563  const SmallVectorImpl<SDValue> &OutVals,
7564  const SDLoc &dl, SelectionDAG &DAG) const {
7566  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7567  *DAG.getContext());
7568  CCInfo.AnalyzeReturn(Outs,
7569  (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7570  ? RetCC_PPC_Cold
7571  : RetCC_PPC);
7572 
7573  SDValue Flag;
7574  SmallVector<SDValue, 4> RetOps(1, Chain);
7575 
7576  // Copy the result values into the output registers.
7577  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7578  CCValAssign &VA = RVLocs[i];
7579  assert(VA.isRegLoc() && "Can only return in registers!");
7580 
7581  SDValue Arg = OutVals[RealResIdx];
7582 
7583  switch (VA.getLocInfo()) {
7584  default: llvm_unreachable("Unknown loc info!");
7585  case CCValAssign::Full: break;
7586  case CCValAssign::AExt:
7587  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7588  break;
7589  case CCValAssign::ZExt:
7590  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7591  break;
7592  case CCValAssign::SExt:
7593  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7594  break;
7595  }
7596  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7597  bool isLittleEndian = Subtarget.isLittleEndian();
7598  // Legalize ret f64 -> ret 2 x i32.
7599  SDValue SVal =
7601  DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7602  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7603  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7604  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7605  DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7606  Flag = Chain.getValue(1);
7607  VA = RVLocs[++i]; // skip ahead to next loc
7608  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7609  } else
7610  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7611  Flag = Chain.getValue(1);
7612  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7613  }
7614 
7615  RetOps[0] = Chain; // Update chain.
7616 
7617  // Add the flag if we have it.
7618  if (Flag.getNode())
7619  RetOps.push_back(Flag);
7620 
7621  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7622 }
7623 
7624 SDValue
7625 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7626  SelectionDAG &DAG) const {
7627  SDLoc dl(Op);
7628 
7629  // Get the correct type for integers.
7630  EVT IntVT = Op.getValueType();
7631 
7632  // Get the inputs.
7633  SDValue Chain = Op.getOperand(0);
7634  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7635  // Build a DYNAREAOFFSET node.
7636  SDValue Ops[2] = {Chain, FPSIdx};
7637  SDVTList VTs = DAG.getVTList(IntVT);
7638  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7639 }
7640 
7641 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7642  SelectionDAG &DAG) const {
7643  // When we pop the dynamic allocation we need to restore the SP link.
7644  SDLoc dl(Op);
7645 
7646  // Get the correct type for pointers.
7647  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7648 
7649  // Construct the stack pointer operand.
7650  bool isPPC64 = Subtarget.isPPC64();
7651  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7652  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7653 
7654  // Get the operands for the STACKRESTORE.
7655  SDValue Chain = Op.getOperand(0);
7656  SDValue SaveSP = Op.getOperand(1);
7657 
7658  // Load the old link SP.
7659  SDValue LoadLinkSP =
7660  DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7661 
7662  // Restore the stack pointer.
7663  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7664 
7665  // Store the old link SP.
7666  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7667 }
7668 
7669 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7670  MachineFunction &MF = DAG.getMachineFunction();
7671  bool isPPC64 = Subtarget.isPPC64();
7672  EVT PtrVT = getPointerTy(MF.getDataLayout());
7673 
7674  // Get current frame pointer save index. The users of this index will be
7675  // primarily DYNALLOC instructions.
7677  int RASI = FI->getReturnAddrSaveIndex();
7678 
7679  // If the frame pointer save index hasn't been defined yet.
7680  if (!RASI) {
7681  // Find out what the fix offset of the frame pointer save area.
7682  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7683  // Allocate the frame index for frame pointer save area.
7684  RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7685  // Save the result.
7686  FI->setReturnAddrSaveIndex(RASI);
7687  }
7688  return DAG.getFrameIndex(RASI, PtrVT);
7689 }
7690 
7691 SDValue
7692 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7693  MachineFunction &MF = DAG.getMachineFunction();
7694  bool isPPC64 = Subtarget.isPPC64();
7695  EVT PtrVT = getPointerTy(MF.getDataLayout());
7696 
7697  // Get current frame pointer save index. The users of this index will be
7698  // primarily DYNALLOC instructions.
7700  int FPSI = FI->getFramePointerSaveIndex();
7701 
7702  // If the frame pointer save index hasn't been defined yet.
7703  if (!FPSI) {
7704  // Find out what the fix offset of the frame pointer save area.
7705  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7706  // Allocate the frame index for frame pointer save area.
7707  FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7708  // Save the result.
7709  FI->setFramePointerSaveIndex(FPSI);
7710  }
7711  return DAG.getFrameIndex(FPSI, PtrVT);
7712 }
7713 
7714 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7715  SelectionDAG &DAG) const {
7716  MachineFunction &MF = DAG.getMachineFunction();
7717  // Get the inputs.
7718  SDValue Chain = Op.getOperand(0);
7719  SDValue Size = Op.getOperand(1);
7720  SDLoc dl(Op);
7721 
7722  // Get the correct type for pointers.
7723  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7724  // Negate the size.
7725  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7726  DAG.getConstant(0, dl, PtrVT), Size);
7727  // Construct a node for the frame pointer save index.
7728  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7729  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7730  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7731  if (hasInlineStackProbe(MF))
7732  return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7733  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7734 }
7735 
7736 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7737  SelectionDAG &DAG) const {
7738  MachineFunction &MF = DAG.getMachineFunction();
7739 
7740  bool isPPC64 = Subtarget.isPPC64();
7741  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7742 
7743  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7744  return DAG.getFrameIndex(FI, PtrVT);
7745 }
7746 
7747 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7748  SelectionDAG &DAG) const {
7749  SDLoc DL(Op);
7750  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7752  Op.getOperand(0), Op.getOperand(1));
7753 }
7754 
7755 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7756  SelectionDAG &DAG) const {
7757  SDLoc DL(Op);
7759  Op.getOperand(0), Op.getOperand(1));
7760 }
7761 
7762 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7763  if (Op.getValueType().isVector())
7764  return LowerVectorLoad(Op, DAG);
7765 
7766  assert(Op.getValueType() == MVT::i1 &&
7767  "Custom lowering only for i1 loads");
7768 
7769  // First, load 8 bits into 32 bits, then truncate to 1 bit.
7770 
7771  SDLoc dl(Op);
7772  LoadSDNode *LD = cast<LoadSDNode>(Op);
7773 
7774  SDValue Chain = LD->getChain();
7775  SDValue BasePtr = LD->getBasePtr();
7776  MachineMemOperand *MMO = LD->getMemOperand();
7777 
7778  SDValue NewLD =
7779  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7780  BasePtr, MVT::i8, MMO);
7781  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7782 
7783  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7784  return DAG.getMergeValues(Ops, dl);
7785 }
7786 
7787 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7788  if (Op.getOperand(1).getValueType().isVector())
7789  return LowerVectorStore(Op, DAG);
7790 
7791  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7792  "Custom lowering only for i1 stores");
7793 
7794  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7795 
7796  SDLoc dl(Op);
7797  StoreSDNode *ST = cast<StoreSDNode>(Op);
7798 
7799  SDValue Chain = ST->getChain();
7800  SDValue BasePtr = ST->getBasePtr();
7801  SDValue Value = ST->getValue();
7802  MachineMemOperand *MMO = ST->getMemOperand();
7803 
7805  Value);
7806  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7807 }
7808 
7809 // FIXME: Remove this once the ANDI glue bug is fixed:
7810 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7811  assert(Op.getValueType() == MVT::i1 &&
7812  "Custom lowering only for i1 results");
7813 
7814  SDLoc DL(Op);
7815  return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7816 }
7817 
7818 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7819  SelectionDAG &DAG) const {
7820 
7821  // Implements a vector truncate that fits in a vector register as a shuffle.
7822  // We want to legalize vector truncates down to where the source fits in
7823  // a vector register (and target is therefore smaller than vector register
7824  // size). At that point legalization will try to custom lower the sub-legal
7825  // result and get here - where we can contain the truncate as a single target
7826  // operation.
7827 
7828  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7829  // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7830  //
7831  // We will implement it for big-endian ordering as this (where x denotes
7832  // undefined):
7833  // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7834  // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7835  //
7836  // The same operation in little-endian ordering will be:
7837  // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7838  // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7839 
7840  EVT TrgVT = Op.getValueType();
7841  assert(TrgVT.isVector() && "Vector type expected.");
7842  unsigned TrgNumElts = TrgVT.getVectorNumElements();
7843  EVT EltVT = TrgVT.getVectorElementType();
7844  if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7845  TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7846  !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
7847  return SDValue();
7848 
7849  SDValue N1 = Op.getOperand(0);
7850  EVT SrcVT = N1.getValueType();
7851  unsigned SrcSize = SrcVT.getSizeInBits();
7852  if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7853  !llvm::has_single_bit<uint32_t>(
7855  return SDValue();
7856  if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7857  return SDValue();
7858 
7859  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7860  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7861 
7862  SDLoc DL(Op);
7863  SDValue Op1, Op2;
7864  if (SrcSize == 256) {
7865  EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7866  EVT SplitVT =
7868  unsigned SplitNumElts = SplitVT.getVectorNumElements();
7869  Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7870  DAG.getConstant(0, DL, VecIdxTy));
7871  Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7872  DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7873  }
7874  else {
7875  Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7876  Op2 = DAG.getUNDEF(WideVT);
7877  }
7878 
7879  // First list the elements we want to keep.
7880  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7881  SmallVector<int, 16> ShuffV;
7882  if (Subtarget.isLittleEndian())
7883  for (unsigned i = 0; i < TrgNumElts; ++i)
7884  ShuffV.push_back(i * SizeMult);
7885  else
7886  for (unsigned i = 1; i <= TrgNumElts; ++i)
7887  ShuffV.push_back(i * SizeMult - 1);
7888 
7889  // Populate the remaining elements with undefs.
7890  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7891  // ShuffV.push_back(i + WideNumElts);
7892  ShuffV.push_back(WideNumElts + 1);
7893 
7894  Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7895  Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7896  return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7897 }
7898 
7899 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7900 /// possible.
7901 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7902  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7903  EVT ResVT = Op.getValueType();
7904  EVT CmpVT = Op.getOperand(0).getValueType();
7905  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7906  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7907  SDLoc dl(Op);
7908 
7909  // Without power9-vector, we don't have native instruction for f128 comparison.
7910  // Following transformation to libcall is needed for setcc:
7911  // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7912  if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7913  SDValue Z = DAG.getSetCC(
7914  dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
7915  LHS, RHS, CC);
7916  SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7917  return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7918  }
7919 
7920  // Not FP, or using SPE? Not a fsel.
7921  if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7922  Subtarget.hasSPE())
7923  return Op;
7924 
7925  SDNodeFlags Flags = Op.getNode()->getFlags();
7926 
7927  // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
7928  // presence of infinities.
7929  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7930  switch (CC) {
7931  default:
7932  break;
7933  case ISD::SETOGT:
7934  case ISD::SETGT:
7935  return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
7936  case ISD::SETOLT:
7937  case ISD::SETLT:
7938  return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
7939  }
7940  }
7941 
7942  // We might be able to do better than this under some circumstances, but in
7943  // general, fsel-based lowering of select is a finite-math-only optimization.
7944  // For more information, see section F.3 of the 2.06 ISA specification.
7945  // With ISA 3.0
7946  if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7947  (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7948  return Op;
7949 
7950  // If the RHS of the comparison is a 0.0, we don't need to do the
7951  // subtraction at all.
7952  SDValue Sel1;
7953  if (isFloatingPointZero(RHS))
7954  switch (CC) {
7955  default: break; // SETUO etc aren't handled by fsel.
7956  case ISD::SETNE:
7957  std::swap(TV, FV);
7958  [[fallthrough]];
7959  case ISD::SETEQ:
7960  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7961  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7962  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7963  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7964  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7965  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7966  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7967  case ISD::SETULT:
7968  case ISD::SETLT:
7969  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7970  [[fallthrough]];
7971  case ISD::SETOGE:
7972  case ISD::SETGE:
7973  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7974  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7975  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7976  case ISD::SETUGT:
7977  case ISD::SETGT:
7978  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7979  [[fallthrough]];
7980  case ISD::SETOLE:
7981  case ISD::SETLE:
7982  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7983  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7984  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7985  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7986  }
7987 
7988  SDValue Cmp;
7989  switch (CC) {
7990  default: break; // SETUO etc aren't handled by fsel.
7991  case ISD::SETNE:
7992  std::swap(TV, FV);
7993  [[fallthrough]];
7994  case ISD::SETEQ:
7995  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7996  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7997  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7998  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7999  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8000  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8001  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8002  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8003  case ISD::SETULT:
8004  case ISD::SETLT:
8005  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8006  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8007  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8008  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8009  case ISD::SETOGE:
8010  case ISD::SETGE:
8011  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8012  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8013  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8014  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8015  case ISD::SETUGT:
8016  case ISD::SETGT:
8017  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8018  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8019  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8020  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8021  case ISD::SETOLE:
8022  case ISD::SETLE:
8023  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8024  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8025  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8026  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8027  }
8028  return Op;
8029 }
8030 
8031 static unsigned getPPCStrictOpcode(unsigned Opc) {
8032  switch (Opc) {
8033  default:
8034  llvm_unreachable("No strict version of this opcode!");
8035  case PPCISD::FCTIDZ:
8036  return PPCISD::STRICT_FCTIDZ;
8037  case PPCISD::FCTIWZ:
8038  return PPCISD::STRICT_FCTIWZ;
8039  case PPCISD::FCTIDUZ:
8040  return PPCISD::STRICT_FCTIDUZ;
8041  case PPCISD::FCTIWUZ:
8042  return PPCISD::STRICT_FCTIWUZ;
8043  case PPCISD::FCFID:
8044  return PPCISD::STRICT_FCFID;
8045  case PPCISD::FCFIDU:
8046  return PPCISD::STRICT_FCFIDU;
8047  case PPCISD::FCFIDS:
8048  return PPCISD::STRICT_FCFIDS;
8049  case PPCISD::FCFIDUS:
8050  return PPCISD::STRICT_FCFIDUS;
8051  }
8052 }
8053 
8055  const PPCSubtarget &Subtarget) {
8056  SDLoc dl(Op);
8057  bool IsStrict = Op->isStrictFPOpcode();
8058  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8059  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8060 
8061  // TODO: Any other flags to propagate?
8062  SDNodeFlags Flags;
8063  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8064 
8065  // For strict nodes, source is the second operand.
8066  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8067  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8068  assert(Src.getValueType().isFloatingPoint());
8069  if (Src.getValueType() == MVT::f32) {
8070  if (IsStrict) {
8071  Src =
8073  DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8074  Chain = Src.getValue(1);
8075  } else
8076  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8077  }
8078  SDValue Conv;
8079  unsigned Opc = ISD::DELETED_NODE;
8080  switch (Op.getSimpleValueType().SimpleTy) {
8081  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8082  case MVT::i32:
8083  Opc = IsSigned ? PPCISD::FCTIWZ
8084  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8085  break;
8086  case MVT::i64:
8087  assert((IsSigned || Subtarget.hasFPCVT()) &&
8088  "i64 FP_TO_UINT is supported only with FPCVT");
8089  Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8090  }
8091  if (IsStrict) {
8092  Opc = getPPCStrictOpcode(Opc);
8093  Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8094  {Chain, Src}, Flags);
8095  } else {
8096  Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8097  }
8098  return Conv;
8099 }
8100 
8101 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8102  SelectionDAG &DAG,
8103  const SDLoc &dl) const {
8104  SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8105  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8106  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8107  bool IsStrict = Op->isStrictFPOpcode();
8108 
8109  // Convert the FP value to an int value through memory.
8110  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8111  (IsSigned || Subtarget.hasFPCVT());
8112  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8113  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8114  MachinePointerInfo MPI =
8116 
8117  // Emit a store to the stack slot.
8118  SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8119  Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8120  if (i32Stack) {
8121  MachineFunction &MF = DAG.getMachineFunction();
8122  Alignment = Align(4);
8123  MachineMemOperand *MMO =
8124  MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8125  SDValue Ops[] = { Chain, Tmp, FIPtr };
8126  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8127  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8128  } else
8129  Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8130 
8131  // Result is a load from the stack slot. If loading 4 bytes, make sure to
8132  // add in a bias on big endian.
8133  if (Op.getValueType() == MVT::i32 && !i32Stack) {
8134  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8135  DAG.getConstant(4, dl, FIPtr.getValueType()));
8136  MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8137  }
8138 
8139  RLI.Chain = Chain;
8140  RLI.Ptr = FIPtr;
8141  RLI.MPI = MPI;
8142  RLI.Alignment = Alignment;
8143 }
8144 
8145 /// Custom lowers floating point to integer conversions to use
8146 /// the direct move instructions available in ISA 2.07 to avoid the
8147 /// need for load/store combinations.
8148 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8149  SelectionDAG &DAG,
8150  const SDLoc &dl) const {
8151  SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8152  SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8153  if (Op->isStrictFPOpcode())
8154  return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8155  else
8156  return Mov;
8157 }
8158 
8159 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8160  const SDLoc &dl) const {
8161  bool IsStrict = Op->isStrictFPOpcode();
8162  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8163  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8164  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8165  EVT SrcVT = Src.getValueType();
8166  EVT DstVT = Op.getValueType();
8167 
8168  // FP to INT conversions are legal for f128.
8169  if (SrcVT == MVT::f128)
8170  return Subtarget.hasP9Vector() ? Op : SDValue();
8171 
8172  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8173  // PPC (the libcall is not available).
8174  if (SrcVT == MVT::ppcf128) {
8175  if (DstVT == MVT::i32) {
8176  // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8177  // set other fast-math flags to FP operations in both strict and
8178  // non-strict cases. (FP_TO_SINT, FSUB)
8180  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8181 
8182  if (IsSigned) {
8184  DAG.getIntPtrConstant(0, dl));
8186  DAG.getIntPtrConstant(1, dl));
8187 
8188  // Add the two halves of the long double in round-to-zero mode, and use
8189  // a smaller FP_TO_SINT.
8190  if (IsStrict) {
8191  SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8193  {Op.getOperand(0), Lo, Hi}, Flags);
8194  return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8196  {Res.getValue(1), Res}, Flags);
8197  } else {
8198  SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8199  return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8200  }
8201  } else {
8202  const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8203  APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8204  SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8205  SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8206  if (IsStrict) {
8207  // Sel = Src < 0x80000000
8208  // FltOfs = select Sel, 0.0, 0x80000000
8209  // IntOfs = select Sel, 0, 0x80000000
8210  // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8211  SDValue Chain = Op.getOperand(0);
8212  EVT SetCCVT =
8213  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8214  EVT DstSetCCVT =
8215  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8216  SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8217  Chain, true);
8218  Chain = Sel.getValue(1);
8219 
8220  SDValue FltOfs = DAG.getSelect(
8221  dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8222  Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8223 
8224  SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8225  DAG.getVTList(SrcVT, MVT::Other),
8226  {Chain, Src, FltOfs}, Flags);
8227  Chain = Val.getValue(1);
8228  SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8229  DAG.getVTList(DstVT, MVT::Other),
8230  {Chain, Val}, Flags);
8231  Chain = SInt.getValue(1);
8232  SDValue IntOfs = DAG.getSelect(
8233  dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8234  SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8235  return DAG.getMergeValues({Result, Chain}, dl);
8236  } else {
8237  // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8238  // FIXME: generated code sucks.
8239  SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8240  True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8241  True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8242  SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8243  return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8244  }
8245  }
8246  }
8247 
8248  return SDValue();
8249  }
8250 
8251  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8252  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8253 
8254  ReuseLoadInfo RLI;
8255  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8256 
8257  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8258  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8259 }
8260 
8261 // We're trying to insert a regular store, S, and then a load, L. If the
8262 // incoming value, O, is a load, we might just be able to have our load use the
8263 // address used by O. However, we don't know if anything else will store to
8264 // that address before we can load from it. To prevent this situation, we need
8265 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8266 // the same chain operand as O, we create a token factor from the chain results
8267 // of O and L, and we replace all uses of O's chain result with that token
8268 // factor (see spliceIntoChain below for this last part).
8269 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8270  ReuseLoadInfo &RLI,
8271  SelectionDAG &DAG,
8272  ISD::LoadExtType ET) const {
8273  // Conservatively skip reusing for constrained FP nodes.
8274  if (Op->isStrictFPOpcode())
8275  return false;
8276 
8277  SDLoc dl(Op);
8278  bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8279  (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8280  if (ET == ISD::NON_EXTLOAD &&
8281  (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8282  isOperationLegalOrCustom(Op.getOpcode(),
8283  Op.getOperand(0).getValueType())) {
8284 
8285  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8286  return true;
8287  }
8288 
8289  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8290  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8291  LD->isNonTemporal())
8292  return false;
8293  if (LD->getMemoryVT() != MemVT)
8294  return false;
8295 
8296  // If the result of the load is an illegal type, then we can't build a
8297  // valid chain for reuse since the legalised loads and token factor node that
8298  // ties the legalised loads together uses a different output chain then the
8299  // illegal load.
8300  if (!isTypeLegal(LD->getValueType(0)))
8301  return false;
8302 
8303  RLI.Ptr = LD->getBasePtr();
8304  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8305  assert(LD->getAddressingMode() == ISD::PRE_INC &&
8306  "Non-pre-inc AM on PPC?");
8307  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8308  LD->getOffset());
8309  }
8310 
8311  RLI.Chain = LD->getChain();
8312  RLI.MPI = LD->getPointerInfo();
8313  RLI.IsDereferenceable = LD->isDereferenceable();
8314  RLI.IsInvariant = LD->isInvariant();
8315  RLI.Alignment = LD->getAlign();
8316  RLI.AAInfo = LD->getAAInfo();
8317  RLI.Ranges = LD->getRanges();
8318 
8319  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8320  return true;
8321 }
8322 
8323 // Given the head of the old chain, ResChain, insert a token factor containing
8324 // it and NewResChain, and make users of ResChain now be users of that token
8325 // factor.
8326 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8327 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8328  SDValue NewResChain,
8329  SelectionDAG &DAG) const {
8330  if (!ResChain)
8331  return;
8332 
8333  SDLoc dl(NewResChain);
8334 
8336  NewResChain, DAG.getUNDEF(MVT::Other));
8337  assert(TF.getNode() != NewResChain.getNode() &&
8338  "A new TF really is required here");
8339 
8340  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8341  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8342 }
8343 
8344 /// Analyze profitability of direct move
8345 /// prefer float load to int load plus direct move
8346 /// when there is no integer use of int load
8347 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8348  SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8349  if (Origin->getOpcode() != ISD::LOAD)
8350  return true;
8351 
8352  // If there is no LXSIBZX/LXSIHZX, like Power8,
8353  // prefer direct move if the memory size is 1 or 2 bytes.
8354  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8355  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8356  return true;
8357 
8358  for (SDNode::use_iterator UI = Origin->use_begin(),
8359  UE = Origin->use_end();
8360  UI != UE; ++UI) {
8361 
8362  // Only look at the users of the loaded value.
8363  if (UI.getUse().get().getResNo() != 0)
8364  continue;
8365 
8366  if (UI->getOpcode() != ISD::SINT_TO_FP &&
8367  UI->getOpcode() != ISD::UINT_TO_FP &&
8368  UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8369  UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8370  return true;
8371  }
8372 
8373  return false;
8374 }
8375 
8377  const PPCSubtarget &Subtarget,
8378  SDValue Chain = SDValue()) {
8379  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8380  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8381  SDLoc dl(Op);
8382 
8383  // TODO: Any other flags to propagate?
8384  SDNodeFlags Flags;
8385  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8386 
8387  // If we have FCFIDS, then use it when converting to single-precision.
8388  // Otherwise, convert to double-precision and then round.
8389  bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8390  unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8391  : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8392  EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8393  if (Op->isStrictFPOpcode()) {
8394  if (!Chain)
8395  Chain = Op.getOperand(0);
8396  return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8397  DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8398  } else
8399  return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8400 }
8401 
8402 /// Custom lowers integer to floating point conversions to use
8403 /// the direct move instructions available in ISA 2.07 to avoid the
8404 /// need for load/store combinations.
8405 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8406  SelectionDAG &DAG,
8407  const SDLoc &dl) const {
8408  assert((Op.getValueType() == MVT::f32 ||
8409  Op.getValueType() == MVT::f64) &&
8410  "Invalid floating point type as target of conversion");
8411  assert(Subtarget.hasFPCVT() &&
8412  "Int to FP conversions with direct moves require FPCVT");
8413  SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8414  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8415  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8416  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8417  unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8418  SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8419  return convertIntToFP(Op, Mov, DAG, Subtarget);
8420 }
8421 
8422 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8423 
8424  EVT VecVT = Vec.getValueType();
8425  assert(VecVT.isVector() && "Expected a vector type.");
8426  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8427 
8428  EVT EltVT = VecVT.getVectorElementType();
8429  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8430  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8431 
8432  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8433  SmallVector<SDValue, 16> Ops(NumConcat);
8434  Ops[0] = Vec;
8435  SDValue UndefVec = DAG.getUNDEF(VecVT);
8436  for (unsigned i = 1; i < NumConcat; ++i)
8437  Ops[i] = UndefVec;
8438 
8439  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8440 }
8441 
8442 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8443  const SDLoc &dl) const {
8444  bool IsStrict = Op->isStrictFPOpcode();
8445  unsigned Opc = Op.getOpcode();
8446  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8447  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8448  Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8449  "Unexpected conversion type");
8450  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8451  "Supports conversions to v2f64/v4f32 only.");
8452 
8453  // TODO: Any other flags to propagate?
8455  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8456 
8457  bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8458  bool FourEltRes = Op.getValueType() == MVT::v4f32;
8459 
8460  SDValue Wide = widenVec(DAG, Src, dl);
8461  EVT WideVT = Wide.getValueType();
8462  unsigned WideNumElts = WideVT.getVectorNumElements();
8463  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8464 
8465  SmallVector<int, 16> ShuffV;
8466  for (unsigned i = 0; i < WideNumElts; ++i)
8467  ShuffV.push_back(i + WideNumElts);
8468 
8469  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8470  int SaveElts = FourEltRes ? 4 : 2;
8471  if (Subtarget.isLittleEndian())
8472  for (int i = 0; i < SaveElts; i++)
8473  ShuffV[i * Stride] = i;
8474  else
8475  for (int i = 1; i <= SaveElts; i++)
8476  ShuffV[i * Stride - 1] = i - 1;
8477 
8478  SDValue ShuffleSrc2 =
8479  SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8480  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8481 
8482  SDValue Extend;
8483  if (SignedConv) {
8484  Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8485  EVT ExtVT = Src.getValueType();
8486  if (Subtarget.hasP9Altivec())
8487  ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8488  IntermediateVT.getVectorNumElements());
8489 
8490  Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8491  DAG.getValueType(ExtVT));
8492  } else
8493  Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8494 
8495  if (IsStrict)
8496  return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8497  {Op.getOperand(0), Extend}, Flags);
8498 
8499  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8500 }
8501 
8502 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8503  SelectionDAG &DAG) const {
8504  SDLoc dl(Op);
8505  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8506  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8507  bool IsStrict = Op->isStrictFPOpcode();
8508  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8509  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8510 
8511  // TODO: Any other flags to propagate?
8513  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8514 
8515  EVT InVT = Src.getValueType();
8516  EVT OutVT = Op.getValueType();
8517  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8518  isOperationCustom(Op.getOpcode(), InVT))
8519  return LowerINT_TO_FPVector(Op, DAG, dl);
8520 
8521  // Conversions to f128 are legal.
8522  if (Op.getValueType() == MVT::f128)
8523  return Subtarget.hasP9Vector() ? Op : SDValue();
8524 
8525  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8526  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8527  return SDValue();
8528 
8529  if (Src.getValueType() == MVT::i1) {
8530  SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8531  DAG.getConstantFP(1.0, dl, Op.getValueType()),
8532  DAG.getConstantFP(0.0, dl, Op.getValueType()));
8533  if (IsStrict)
8534  return DAG.getMergeValues({Sel, Chain}, dl);
8535  else
8536  return Sel;
8537  }
8538 
8539  // If we have direct moves, we can do all the conversion, skip the store/load
8540  // however, without FPCVT we can't do most conversions.
8541  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8542  Subtarget.isPPC64() && Subtarget.hasFPCVT())
8543  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8544 
8545  assert((IsSigned || Subtarget.hasFPCVT()) &&
8546  "UINT_TO_FP is supported only with FPCVT");
8547 
8548  if (Src.getValueType() == MVT::i64) {
8549  SDValue SINT = Src;
8550  // When converting to single-precision, we actually need to convert
8551  // to double-precision first and then round to single-precision.
8552  // To avoid double-rounding effects during that operation, we have
8553  // to prepare the input operand. Bits that might be truncated when
8554  // converting to double-precision are replaced by a bit that won't
8555  // be lost at this stage, but is below the single-precision rounding
8556  // position.
8557  //
8558  // However, if -enable-unsafe-fp-math is in effect, accept double
8559  // rounding to avoid the extra overhead.
8560  if (Op.getValueType() == MVT::f32 &&
8561  !Subtarget.hasFPCVT() &&
8562  !DAG.getTarget().Options.UnsafeFPMath) {
8563 
8564  // Twiddle input to make sure the low 11 bits are zero. (If this
8565  // is the case, we are guaranteed the value will fit into the 53 bit
8566  // mantissa of an IEEE double-precision value without rounding.)
8567  // If any of those low 11 bits were not zero originally, make sure
8568  // bit 12 (value 2048) is set instead, so that the final rounding
8569  // to single-precision gets the correct result.
8570  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8571  SINT, DAG.getConstant(2047, dl, MVT::i64));
8572  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8573  Round, DAG.getConstant(2047, dl, MVT::i64));
8574  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8575  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8576  Round, DAG.getConstant(-2048, dl, MVT::i64));
8577 
8578  // However, we cannot use that value unconditionally: if the magnitude
8579  // of the input value is small, the bit-twiddling we did above might
8580  // end up visibly changing the output. Fortunately, in that case, we
8581  // don't need to twiddle bits since the original input will convert
8582  // exactly to double-precision floating-point already. Therefore,
8583  // construct a conditional to use the original value if the top 11
8584  // bits are all sign-bit copies, and use the rounded value computed
8585  // above otherwise.
8586  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8587  SINT, DAG.getConstant(53, dl, MVT::i32));
8588  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8589  Cond, DAG.getConstant(1, dl, MVT::i64));
8590  Cond = DAG.getSetCC(
8591  dl,
8593  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8594 
8595  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8596  }
8597 
8598  ReuseLoadInfo RLI;
8599  SDValue Bits;
8600 
8601  MachineFunction &MF = DAG.getMachineFunction();
8602  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8603  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8604  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8605  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8606  } else if (Subtarget.hasLFIWAX() &&
8607  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8608  MachineMemOperand *MMO =
8610  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8611  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8614  Ops, MVT::i32, MMO);
8615  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8616  } else if (Subtarget.hasFPCVT() &&
8617  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8618  MachineMemOperand *MMO =
8620  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8621  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8624  Ops, MVT::i32, MMO);
8625  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8626  } else if (((Subtarget.hasLFIWAX() &&
8627  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8628  (Subtarget.hasFPCVT() &&
8629  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8630  SINT.getOperand(0).getValueType() == MVT::i32) {
8631  MachineFrameInfo &MFI = MF.getFrameInfo();
8632  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8633 
8634  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8635  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8636 
8637  SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8639  DAG.getMachineFunction(), FrameIdx));
8640  Chain = Store;
8641 
8642  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8643  "Expected an i32 store");
8644 
8645  RLI.Ptr = FIdx;
8646  RLI.Chain = Chain;
8647  RLI.MPI =
8649  RLI.Alignment = Align(4);
8650 
8651  MachineMemOperand *MMO =
8653  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8654  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8657  dl, DAG.getVTList(MVT::f64, MVT::Other),
8658  Ops, MVT::i32, MMO);
8659  Chain = Bits.getValue(1);
8660  } else
8661  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8662 
8663  SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8664  if (IsStrict)
8665  Chain = FP.getValue(1);
8666 
8667  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8668  if (IsStrict)
8669  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8671  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8672  else
8673  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8674  DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8675  }
8676  return FP;
8677  }
8678 
8679  assert(Src.getValueType() == MVT::i32 &&
8680  "Unhandled INT_TO_FP type in custom expander!");
8681  // Since we only generate this in 64-bit mode, we can take advantage of
8682  // 64-bit registers. In particular, sign extend the input value into the
8683  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8684  // then lfd it and fcfid it.
8685  MachineFunction &MF = DAG.getMachineFunction();
8686  MachineFrameInfo &MFI = MF.getFrameInfo();
8687  EVT PtrVT = getPointerTy(MF.getDataLayout());
8688 
8689  SDValue Ld;
8690  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8691  ReuseLoadInfo RLI;
8692  bool ReusingLoad;
8693  if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8694  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8695  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8696 
8697  SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8699  DAG.getMachineFunction(), FrameIdx));
8700  Chain = Store;
8701 
8702  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8703  "Expected an i32 store");
8704 
8705  RLI.Ptr = FIdx;
8706  RLI.Chain = Chain;
8707  RLI.MPI =
8709  RLI.Alignment = Align(4);
8710  }
8711 
8712  MachineMemOperand *MMO =
8714  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8715  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8716  Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8717  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8718  MVT::i32, MMO);
8719  Chain = Ld.getValue(1);
8720  if (ReusingLoad)
8721  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8722  } else {
8723  assert(Subtarget.isPPC64() &&
8724  "i32->FP without LFIWAX supported only on PPC64");
8725 
8726  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8727  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8728 
8729  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8730 
8731  // STD the extended value into the stack slot.
8732  SDValue Store = DAG.getStore(
8733  Chain, dl, Ext64, FIdx,
8735  Chain = Store;
8736 
8737  // Load the value as a double.
8738  Ld = DAG.getLoad(
8739  MVT::f64, dl, Chain, FIdx,
8741  Chain = Ld.getValue(1);
8742  }
8743 
8744  // FCFID it and return it.
8745  SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8746  if (IsStrict)
8747  Chain = FP.getValue(1);
8748  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8749  if (IsStrict)
8750  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8752  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8753  else
8754  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8755  DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8756  }
8757  return FP;
8758 }
8759 
8760 SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
8761  SelectionDAG &DAG) const {
8762  SDLoc dl(Op);
8763  /*
8764  The rounding mode is in bits 30:31 of FPSR, and has the following
8765  settings:
8766  00 Round to nearest
8767  01 Round to 0
8768  10 Round to +inf
8769  11 Round to -inf
8770 
8771  GET_ROUNDING, on the other hand, expects the following:
8772  -1 Undefined
8773  0 Round to 0
8774  1 Round to nearest
8775  2 Round to +inf
8776  3 Round to -inf
8777 
8778  To perform the conversion, we do:
8779  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8780  */
8781 
8782  MachineFunction &MF = DAG.getMachineFunction();
8783  EVT VT = Op.getValueType();
8784  EVT PtrVT = getPointerTy(MF.getDataLayout());
8785 
8786  // Save FP Control Word to register
8787  SDValue Chain = Op.getOperand(0);
8788  SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8789  Chain = MFFS.getValue(1);
8790 
8791  SDValue CWD;
8792  if (isTypeLegal(MVT::i64)) {
8793  CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8794  DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8795  } else {
8796  // Save FP register to stack slot
8797  int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8798  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8799  Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8800 
8801  // Load FP Control Word from low 32 bits of stack slot.
8803  "Stack slot adjustment is valid only on big endian subtargets!");
8804  SDValue Four = DAG.getConstant(4, dl, PtrVT);
8805  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8806  CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8807  Chain = CWD.getValue(1);
8808  }
8809 
8810  // Transform as necessary
8811  SDValue CWD1 =
8812  DAG.getNode(ISD::AND, dl, MVT::i32,
8813  CWD, DAG.getConstant(3, dl, MVT::i32));
8814  SDValue CWD2 =
8815  DAG.getNode(ISD::SRL, dl, MVT::i32,
8816  DAG.getNode(ISD::AND, dl, MVT::i32,
8817  DAG.getNode(ISD::XOR, dl, MVT::i32,
8818  CWD, DAG.getConstant(3, dl, MVT::i32)),
8819  DAG.getConstant(3, dl, MVT::i32)),
8820  DAG.getConstant(1, dl, MVT::i32));
8821 
8822  SDValue RetVal =
8823  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8824 
8825  RetVal =
8827  dl, VT, RetVal);
8828 
8829  return DAG.getMergeValues({RetVal, Chain}, dl);
8830 }
8831 
8832 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8833  EVT VT = Op.getValueType();
8834  unsigned BitWidth = VT.getSizeInBits();
8835  SDLoc dl(Op);
8836  assert(Op.getNumOperands() == 3 &&
8837  VT == Op.getOperand(1).getValueType() &&
8838  "Unexpected SHL!");
8839 
8840  // Expand into a bunch of logical ops. Note that these ops
8841  // depend on the PPC behavior for oversized shift amounts.
8842  SDValue Lo = Op.getOperand(0);
8843  SDValue Hi = Op.getOperand(1);
8844  SDValue Amt = Op.getOperand(2);
8845  EVT AmtVT = Amt.getValueType();
8846 
8847  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8848  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8849  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8850  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8851  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8852  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8853  DAG.getConstant(-BitWidth, dl, AmtVT));
8854  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8855  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8856  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8857  SDValue OutOps[] = { OutLo, OutHi };
8858  return DAG.getMergeValues(OutOps, dl);
8859 }
8860 
8861 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8862  EVT VT = Op.getValueType();
8863  SDLoc dl(Op);
8864  unsigned BitWidth = VT.getSizeInBits();
8865  assert(Op.getNumOperands() == 3 &&
8866  VT == Op.getOperand(1).getValueType() &&
8867  "Unexpected SRL!");
8868 
8869  // Expand into a bunch of logical ops. Note that these ops
8870  // depend on the PPC behavior for oversized shift amounts.
8871  SDValue Lo = Op.getOperand(0);
8872  SDValue Hi = Op.getOperand(1);
8873  SDValue Amt = Op.getOperand(2);
8874  EVT AmtVT = Amt.getValueType();
8875 
8876  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8877  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8878  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8879  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8880  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8881  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8882  DAG.getConstant(-BitWidth, dl, AmtVT));
8883  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8884  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8885  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8886  SDValue OutOps[] = { OutLo, OutHi };
8887  return DAG.getMergeValues(OutOps, dl);
8888 }
8889 
8890 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8891  SDLoc dl(Op);
8892  EVT VT = Op.getValueType();
8893  unsigned BitWidth = VT.getSizeInBits();
8894  assert(Op.getNumOperands() == 3 &&
8895  VT == Op.getOperand(1).getValueType() &&
8896  "Unexpected SRA!");
8897 
8898  // Expand into a bunch of logical ops, followed by a select_cc.
8899  SDValue Lo = Op.getOperand(0);
8900  SDValue Hi = Op.getOperand(1);
8901  SDValue Amt = Op.getOperand(2);
8902  EVT AmtVT = Amt.getValueType();
8903 
8904  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8905  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8906  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8907  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8908  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8909  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8910  DAG.getConstant(-BitWidth, dl, AmtVT));
8911  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8912  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8913  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8914  Tmp4, Tmp6, ISD::SETLE);
8915  SDValue OutOps[] = { OutLo, OutHi };
8916  return DAG.getMergeValues(OutOps, dl);
8917 }
8918 
8919 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8920  SelectionDAG &DAG) const {
8921  SDLoc dl(Op);
8922  EVT VT = Op.getValueType();
8923  unsigned BitWidth = VT.getSizeInBits();
8924 
8925  bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8926  SDValue X = Op.getOperand(0);
8927  SDValue Y = Op.getOperand(1);
8928  SDValue Z = Op.getOperand(2);
8929  EVT AmtVT = Z.getValueType();
8930 
8931  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8932  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8933  // This is simpler than TargetLowering::expandFunnelShift because we can rely
8934  // on PowerPC shift by BW being well defined.
8935  Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8936  DAG.getConstant(BitWidth - 1, dl, AmtVT));
8937  SDValue SubZ =
8938  DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8939  X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8940  Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8941  return DAG.getNode(ISD::OR, dl, VT, X, Y);
8942 }
8943 
8944 //===----------------------------------------------------------------------===//
8945 // Vector related lowering.
8946 //
8947 
8948 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8949 /// element size of SplatSize. Cast the result to VT.
8950 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8951  SelectionDAG &DAG, const SDLoc &dl) {
8952  static const MVT VTys[] = { // canonical VT to use for each size.
8954  };
8955 
8956  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8957 
8958  // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8959  if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8960  SplatSize = 1;
8961  Val = 0xFF;
8962  }
8963 
8964  EVT CanonicalVT = VTys[SplatSize-1];
8965 
8966  // Build a canonical splat for this value.
8967  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8968 }
8969 
8970 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8971 /// specified intrinsic ID.
8972 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8973  const SDLoc &dl, EVT DestVT = MVT::Other) {
8974  if (DestVT == MVT::Other) DestVT = Op.getValueType();
8975  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8976  DAG.getConstant(IID, dl, MVT::i32), Op);
8977 }
8978 
8979 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8980 /// specified intrinsic ID.
8982  SelectionDAG &DAG, const SDLoc &dl,
8983  EVT DestVT = MVT::Other) {
8984  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8985  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8986  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8987 }
8988 
8989 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8990 /// specified intrinsic ID.
8991 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8992  SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8993  EVT DestVT = MVT::Other) {
8994  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8995  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8996  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8997 }
8998 
8999 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9000 /// amount. The result has the specified value type.
9001 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9002  SelectionDAG &DAG, const SDLoc &dl) {
9003  // Force LHS/RHS to be the right type.
9004  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9005  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9006 
9007  int Ops[16];
9008  for (unsigned i = 0; i != 16; ++i)
9009  Ops[i] = i + Amt;
9010  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9011  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9012 }
9013 
9014 /// Do we have an efficient pattern in a .td file for this node?
9015 ///
9016 /// \param V - pointer to the BuildVectorSDNode being matched
9017 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9018 ///
9019 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9020 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9021 /// the opposite is true (expansion is beneficial) are:
9022 /// - The node builds a vector out of integers that are not 32 or 64-bits
9023 /// - The node builds a vector out of constants
9024 /// - The node is a "load-and-splat"
9025 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9027  bool HasDirectMove,
9028  bool HasP8Vector) {
9029  EVT VecVT = V->getValueType(0);
9030  bool RightType = VecVT == MVT::v2f64 ||
9031  (HasP8Vector && VecVT == MVT::v4f32) ||
9032  (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9033  if (!RightType)
9034  return false;
9035 
9036  bool IsSplat = true;
9037  bool IsLoad = false;
9038  SDValue Op0 = V->getOperand(0);
9039 
9040  // This function is called in a block that confirms the node is not a constant
9041  // splat. So a constant BUILD_VECTOR here means the vector is built out of
9042  // different constants.
9043  if (V->isConstant())
9044  return false;
9045  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9046  if (V->getOperand(i).isUndef())
9047  return false;
9048  // We want to expand nodes that represent load-and-splat even if the
9049  // loaded value is a floating point truncation or conversion to int.
9050  if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9051  (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9052  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9053  (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9054  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9055  (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9056  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9057  IsLoad = true;
9058  // If the operands are different or the input is not a load and has more
9059  // uses than just this BV node, then it isn't a splat.
9060  if (V->getOperand(i) != Op0 ||
9061  (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9062  IsSplat = false;
9063  }
9064  return !(IsSplat && IsLoad);
9065 }
9066 
9067 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9068 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9069 
9070  SDLoc dl(Op);
9071  SDValue Op0 = Op->getOperand(0);
9072 
9073  if ((Op.getValueType() != MVT::f128) ||
9074  (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9075  (Op0.getOperand(0).getValueType() != MVT::i64) ||
9076  (Op0.getOperand(1).getValueType() != MVT::i64))
9077  return SDValue();
9078 
9079  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9080  Op0.getOperand(1));
9081 }
9082 
9083 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9084  const SDValue *InputLoad = &Op;
9085  while (InputLoad->getOpcode() == ISD::BITCAST)
9086  InputLoad = &InputLoad->getOperand(0);
9087  if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9088  InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9089  IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9090  InputLoad = &InputLoad->getOperand(0);
9091  }
9092  if (InputLoad->getOpcode() != ISD::LOAD)
9093  return nullptr;
9094  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9095  return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9096 }
9097 
9098 // Convert the argument APFloat to a single precision APFloat if there is no
9099 // loss in information during the conversion to single precision APFloat and the
9100 // resulting number is not a denormal number. Return true if successful.
9102  APFloat APFloatToConvert = ArgAPFloat;
9103  bool LosesInfo = true;
9105  &LosesInfo);
9106  bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9107  if (Success)
9108  ArgAPFloat = APFloatToConvert;
9109  return Success;
9110 }
9111 
9112 // Bitcast the argument APInt to a double and convert it to a single precision
9113 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9114 // argument if there is no loss in information during the conversion from
9115 // double to single precision APFloat and the resulting number is not a denormal
9116 // number. Return true if successful.
9118  double DpValue = ArgAPInt.bitsToDouble();
9119  APFloat APFloatDp(DpValue);
9120  bool Success = convertToNonDenormSingle(APFloatDp);
9121  if (Success)
9122  ArgAPInt = APFloatDp.bitcastToAPInt();
9123  return Success;
9124 }
9125 
9126 // Nondestructive check for convertTonNonDenormSingle.
9128  // Only convert if it loses info, since XXSPLTIDP should
9129  // handle the other case.
9130  APFloat APFloatToConvert = ArgAPFloat;
9131  bool LosesInfo = true;
9133  &LosesInfo);
9134 
9135  return (!LosesInfo && !APFloatToConvert.isDenormal());
9136 }
9137 
9138 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9139  unsigned &Opcode) {
9140  LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9141  if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9142  return false;
9143 
9144  EVT Ty = Op->getValueType(0);
9145  // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9146  // as we cannot handle extending loads for these types.
9147  if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9148  ISD::isNON_EXTLoad(InputNode))
9149  return true;
9150 
9151  EVT MemVT = InputNode->getMemoryVT();
9152  // For v8i16 and v16i8 types, extending loads can be handled as long as the
9153  // memory VT is the same vector element VT type.
9154  // The loads feeding into the v8i16 and v16i8 types will be extending because
9155  // scalar i8/i16 are not legal types.
9156  if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9157  (MemVT == Ty.getVectorElementType()))
9158  return true;
9159 
9160  if (Ty == MVT::v2i64) {
9161  // Check the extend type, when the input type is i32, and the output vector
9162  // type is v2i64.
9163  if (MemVT == MVT::i32) {
9164  if (ISD::isZEXTLoad(InputNode))
9165  Opcode = PPCISD::ZEXT_LD_SPLAT;
9166  if (ISD::isSEXTLoad(InputNode))
9167  Opcode = PPCISD::SEXT_LD_SPLAT;
9168  }
9169  return true;
9170  }
9171  return false;
9172 }
9173 
9174 // If this is a case we can't handle, return null and let the default
9175 // expansion code take care of it. If we CAN select this case, and if it
9176 // selects to a single instruction, return Op. Otherwise, if we can codegen
9177 // this case more efficiently than a constant pool load, lower it to the
9178 // sequence of ops that should be used.
9179 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9180  SelectionDAG &DAG) const {
9181  SDLoc dl(Op);
9182  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9183  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9184 
9185  // Check if this is a splat of a constant value.
9186  APInt APSplatBits, APSplatUndef;
9187  unsigned SplatBitSize;
9188  bool HasAnyUndefs;
9189  bool BVNIsConstantSplat =
9190  BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9191  HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9192 
9193  // If it is a splat of a double, check if we can shrink it to a 32 bit
9194  // non-denormal float which when converted back to double gives us the same
9195  // double. This is to exploit the XXSPLTIDP instruction.
9196  // If we lose precision, we use XXSPLTI32DX.
9197  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9198  Subtarget.hasPrefixInstrs()) {
9199  // Check the type first to short-circuit so we don't modify APSplatBits if
9200  // this block isn't executed.
9201  if ((Op->getValueType(0) == MVT::v2f64) &&
9202  convertToNonDenormSingle(APSplatBits)) {
9203  SDValue SplatNode = DAG.getNode(
9205  DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9206  return DAG.getBitcast(Op.getValueType(), SplatNode);
9207  } else {
9208  // We may lose precision, so we have to use XXSPLTI32DX.
9209 
9210  uint32_t Hi =
9211  (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9212  uint32_t Lo =
9213  (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9214  SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9215 
9216  if (!Hi || !Lo)
9217  // If either load is 0, then we should generate XXLXOR to set to 0.
9218  SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9219 
9220  if (Hi)
9221  SplatNode = DAG.getNode(
9222  PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9223  DAG.getTargetConstant(0, dl, MVT::i32),
9224  DAG.getTargetConstant(Hi, dl, MVT::i32));
9225 
9226  if (Lo)
9227  SplatNode =
9228  DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9229  DAG.getTargetConstant(1, dl, MVT::i32),
9230  DAG.getTargetConstant(Lo, dl, MVT::i32));
9231 
9232  return DAG.getBitcast(Op.getValueType(), SplatNode);
9233  }
9234  }
9235 
9236  if (!BVNIsConstantSplat || SplatBitSize > 32) {
9237  unsigned NewOpcode = PPCISD::LD_SPLAT;
9238 
9239  // Handle load-and-splat patterns as we have instructions that will do this
9240  // in one go.
9241  if (DAG.isSplatValue(Op, true) &&
9242  isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9243  const SDValue *InputLoad = &Op.getOperand(0);
9244  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9245 
9246  // If the input load is an extending load, it will be an i32 -> i64
9247  // extending load and isValidSplatLoad() will update NewOpcode.
9248  unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9249  unsigned ElementSize =
9250  MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9251 
9252  assert(((ElementSize == 2 * MemorySize)
9253  ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9254  NewOpcode == PPCISD::SEXT_LD_SPLAT)
9255  : (NewOpcode == PPCISD::LD_SPLAT)) &&
9256  "Unmatched element size and opcode!\n");
9257 
9258  // Checking for a single use of this load, we have to check for vector
9259  // width (128 bits) / ElementSize uses (since each operand of the
9260  // BUILD_VECTOR is a separate use of the value.
9261  unsigned NumUsesOfInputLD = 128 / ElementSize;
9262  for (SDValue BVInOp : Op->ops())
9263  if (BVInOp.isUndef())
9264  NumUsesOfInputLD--;
9265 
9266  // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9267  // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9268  // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9269  // 15", but funciton IsValidSplatLoad() now will only return true when
9270  // the data at index 0 is not nullptr. So we will not get into trouble for
9271  // these cases.
9272  //
9273  // case 1 - lfiwzx/lfiwax
9274  // 1.1: load result is i32 and is sign/zero extend to i64;
9275  // 1.2: build a v2i64 vector type with above loaded value;
9276  // 1.3: the vector has only one value at index 0, others are all undef;
9277  // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9278  if (NumUsesOfInputLD == 1 &&
9279  (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9280  !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9281  Subtarget.hasLFIWAX()))
9282  return SDValue();
9283 
9284  // case 2 - lxvr[hb]x
9285  // 2.1: load result is at most i16;
9286  // 2.2: build a vector with above loaded value;
9287  // 2.3: the vector has only one value at index 0, others are all undef;
9288  // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9289  if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9290  Subtarget.isISA3_1() && ElementSize <= 16)
9291  return SDValue();
9292 
9293  assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9294  if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9295  Subtarget.hasVSX()) {
9296  SDValue Ops[] = {
9297  LD->getChain(), // Chain
9298  LD->getBasePtr(), // Ptr
9299  DAG.getValueType(Op.getValueType()) // VT
9300  };
9301  SDValue LdSplt = DAG.getMemIntrinsicNode(
9302  NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9303  LD->getMemoryVT(), LD->getMemOperand());
9304  // Replace all uses of the output chain of the original load with the
9305  // output chain of the new load.
9306  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9307  LdSplt.getValue(1));
9308  return LdSplt;
9309  }
9310  }
9311 
9312  // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9313  // 32-bits can be lowered to VSX instructions under certain conditions.
9314  // Without VSX, there is no pattern more efficient than expanding the node.
9315  if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9316  haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9317  Subtarget.hasP8Vector()))
9318  return Op;
9319  return SDValue();
9320  }
9321 
9322  uint64_t SplatBits = APSplatBits.getZExtValue();
9323  uint64_t SplatUndef = APSplatUndef.getZExtValue();
9324  unsigned SplatSize = SplatBitSize / 8;
9325 
9326  // First, handle single instruction cases.
9327 
9328  // All zeros?
9329  if (SplatBits == 0) {
9330  // Canonicalize all zero vectors to be v4i32.
9331  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9332  SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9333  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9334  }
9335  return Op;
9336  }
9337 
9338  // We have XXSPLTIW for constant splats four bytes wide.
9339  // Given vector length is a multiple of 4, 2-byte splats can be replaced
9340  // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9341  // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9342  // turned into a 4-byte splat of 0xABABABAB.
9343  if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9344  return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9345  Op.getValueType(), DAG, dl);
9346 
9347  if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9348  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9349  dl);
9350 
9351  // We have XXSPLTIB for constant splats one byte wide.
9352  if (Subtarget.hasP9Vector() && SplatSize == 1)
9353  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9354  dl);
9355 
9356  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9357  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9358  (32-SplatBitSize));
9359  if (SextVal >= -16 && SextVal <= 15)
9360  return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9361  dl);
9362 
9363  // Two instruction sequences.
9364 
9365  // If this value is in the range [-32,30] and is even, use:
9366  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9367  // If this value is in the range [17,31] and is odd, use:
9368  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9369  // If this value is in the range [-31,-17] and is odd, use:
9370  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9371  // Note the last two are three-instruction sequences.
9372  if (SextVal >= -32 && SextVal <= 31) {
9373  // To avoid having these optimizations undone by constant folding,
9374  // we convert to a pseudo that will be expanded later into one of
9375  // the above forms.
9376  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9377  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9378  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9379  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9380  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9381  if (VT == Op.getValueType())
9382  return RetVal;
9383  else
9384  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9385  }
9386 
9387  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9388  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9389  // for fneg/fabs.
9390  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9391  // Make -1 and vspltisw -1:
9392  SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9393 
9394  // Make the VSLW intrinsic, computing 0x8000_0000.
9395  SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9396  OnesV, DAG, dl);
9397 
9398  // xor by OnesV to invert it.
9399  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9400  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9401  }
9402 
9403  // Check to see if this is a wide variety of vsplti*, binop self cases.
9404  static const signed char SplatCsts[] = {
9405  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9406  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9407  };
9408 
9409  for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9410  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9411  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9412  int i = SplatCsts[idx];
9413 
9414  // Figure out what shift amount will be used by altivec if shifted by i in
9415  // this splat size.
9416  unsigned TypeShiftAmt = i & (SplatBitSize-1);
9417 
9418  // vsplti + shl self.
9419  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9420  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9421  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9422  Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9423  Intrinsic::ppc_altivec_vslw
9424  };
9425  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9426  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9427  }
9428 
9429  // vsplti + srl self.
9430  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9431  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9432  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9433  Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9434  Intrinsic::ppc_altivec_vsrw
9435  };
9436  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9437  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9438  }
9439 
9440  // vsplti + rol self.
9441  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9442  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9443  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9444  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9445  Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9446  Intrinsic::ppc_altivec_vrlw
9447  };
9448  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9449  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9450  }
9451 
9452  // t = vsplti c, result = vsldoi t, t, 1
9453  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9454  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9455  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9456  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9457  }
9458  // t = vsplti c, result = vsldoi t, t, 2
9459  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9460  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9461  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9462  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9463  }
9464  // t = vsplti c, result = vsldoi t, t, 3
9465  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9466  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9467  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9468  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9469  }
9470  }
9471 
9472  return SDValue();
9473 }
9474 
9475 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9476 /// the specified operations to build the shuffle.
9477 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9478  SDValue RHS, SelectionDAG &DAG,
9479  const SDLoc &dl) {
9480  unsigned OpNum = (PFEntry >> 26) & 0x0F;
9481  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9482  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9483 
9484  enum {
9485  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9486  OP_VMRGHW,
9487  OP_VMRGLW,
9488  OP_VSPLTISW0,
9489  OP_VSPLTISW1,
9490  OP_VSPLTISW2,
9491  OP_VSPLTISW3,
9492  OP_VSLDOI4,
9493  OP_VSLDOI8,
9494  OP_VSLDOI12
9495  };
9496 
9497  if (OpNum == OP_COPY) {
9498  if (LHSID == (1*9+2)*9+3) return LHS;
9499  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9500  return RHS;
9501  }
9502 
9503  SDValue OpLHS, OpRHS;
9504  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9505  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9506 
9507  int ShufIdxs[16];
9508  switch (OpNum) {
9509  default: llvm_unreachable("Unknown i32 permute!");
9510  case OP_VMRGHW:
9511  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9512  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9513  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9514  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9515  break;
9516  case OP_VMRGLW:
9517  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9518  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9519  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9520  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9521  break;
9522  case OP_VSPLTISW0:
9523  for (unsigned i = 0; i != 16; ++i)
9524  ShufIdxs[i] = (i&3)+0;
9525  break;
9526  case OP_VSPLTISW1:
9527  for (unsigned i = 0; i != 16; ++i)
9528  ShufIdxs[i] = (i&3)+4;
9529  break;
9530  case OP_VSPLTISW2:
9531  for (unsigned i = 0; i != 16; ++i)
9532  ShufIdxs[i] = (i&3)+8;
9533  break;
9534  case OP_VSPLTISW3:
9535  for (unsigned i = 0; i != 16; ++i)
9536  ShufIdxs[i] = (i&3)+12;
9537  break;
9538  case OP_VSLDOI4:
9539  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9540  case OP_VSLDOI8:
9541  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9542  case OP_VSLDOI12:
9543  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9544  }
9545  EVT VT = OpLHS.getValueType();
9546  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9547  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9548  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9549  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9550 }
9551 
9552 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9553 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9554 /// SDValue.
9555 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9556  SelectionDAG &DAG) const {
9557  const unsigned BytesInVector = 16;
9558  bool IsLE = Subtarget.isLittleEndian();
9559  SDLoc dl(N);
9560  SDValue V1 = N->getOperand(0);
9561  SDValue V2 = N->getOperand(1);
9562  unsigned ShiftElts = 0, InsertAtByte = 0;
9563  bool Swap = false;
9564 
9565  // Shifts required to get the byte we want at element 7.
9566  unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9567  0, 15, 14, 13, 12, 11, 10, 9};
9568  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9569  1, 2, 3, 4, 5, 6, 7, 8};
9570 
9571  ArrayRef<int> Mask = N->getMask();
9572  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9573 
9574  // For each mask element, find out if we're just inserting something
9575  // from V2 into V1 or vice versa.
9576  // Possible permutations inserting an element from V2 into V1:
9577  // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9578  // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9579  // ...
9580  // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9581  // Inserting from V1 into V2 will be similar, except mask range will be
9582  // [16,31].
9583 
9584  bool FoundCandidate = false;
9585  // If both vector operands for the shuffle are the same vector, the mask
9586  // will contain only elements from the first one and the second one will be
9587  // undef.
9588  unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9589  // Go through the mask of half-words to find an element that's being moved
9590  // from one vector to the other.
9591  for (unsigned i = 0; i < BytesInVector; ++i) {
9592  unsigned CurrentElement = Mask[i];
9593  // If 2nd operand is undefined, we should only look for element 7 in the
9594  // Mask.
9595  if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9596  continue;
9597 
9598  bool OtherElementsInOrder = true;
9599  // Examine the other elements in the Mask to see if they're in original
9600  // order.
9601  for (unsigned j = 0; j < BytesInVector; ++j) {
9602  if (j == i)
9603  continue;
9604  // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9605  // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9606  // in which we always assume we're always picking from the 1st operand.
9607  int MaskOffset =
9608  (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9609  if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9610  OtherElementsInOrder = false;
9611  break;
9612  }
9613  }
9614  // If other elements are in original order, we record the number of shifts
9615  // we need to get the element we want into element 7. Also record which byte
9616  // in the vector we should insert into.
9617  if (OtherElementsInOrder) {
9618  // If 2nd operand is undefined, we assume no shifts and no swapping.
9619  if (V2.isUndef()) {
9620  ShiftElts = 0;
9621  Swap = false;
9622  } else {
9623  // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9624  ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9625  : BigEndianShifts[CurrentElement & 0xF];
9626  Swap = CurrentElement < BytesInVector;
9627  }
9628  InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9629  FoundCandidate = true;
9630  break;
9631  }
9632  }
9633 
9634  if (!FoundCandidate)
9635  return SDValue();
9636 
9637  // Candidate found, construct the proper SDAG sequence with VINSERTB,
9638  // optionally with VECSHL if shift is required.
9639  if (Swap)
9640  std::swap(V1, V2);
9641  if (V2.isUndef())
9642  V2 = V1;
9643  if (ShiftElts) {
9644  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9645  DAG.getConstant(ShiftElts, dl, MVT::i32));
9646  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9647  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9648  }
9649  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9650  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9651 }
9652 
9653 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9654 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9655 /// SDValue.
9656 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9657  SelectionDAG &DAG) const {
9658  const unsigned NumHalfWords = 8;
9659  const unsigned BytesInVector = NumHalfWords * 2;
9660  // Check that the shuffle is on half-words.
9661  if (!isNByteElemShuffleMask(N, 2, 1))
9662  return SDValue();
9663 
9664  bool IsLE = Subtarget.isLittleEndian();
9665  SDLoc dl(N);
9666  SDValue V1 = N->getOperand(0);
9667  SDValue V2 = N->getOperand(1);
9668  unsigned ShiftElts = 0, InsertAtByte = 0;
9669  bool Swap = false;
9670 
9671  // Shifts required to get the half-word we want at element 3.
9672  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9673  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9674 
9675  uint32_t Mask = 0;
9676  uint32_t OriginalOrderLow = 0x1234567;
9677  uint32_t OriginalOrderHigh = 0x89ABCDEF;
9678  // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9679  // 32-bit space, only need 4-bit nibbles per element.
9680  for (unsigned i = 0; i < NumHalfWords; ++i) {
9681  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9682  Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9683  }
9684 
9685  // For each mask element, find out if we're just inserting something
9686  // from V2 into V1 or vice versa. Possible permutations inserting an element
9687  // from V2 into V1:
9688  // X, 1, 2, 3, 4, 5, 6, 7
9689  // 0, X, 2, 3, 4, 5, 6, 7
9690  // 0, 1, X, 3, 4, 5, 6, 7
9691  // 0, 1, 2, X, 4, 5, 6, 7
9692  // 0, 1, 2, 3, X, 5, 6, 7
9693  // 0, 1, 2, 3, 4, X, 6, 7
9694  // 0, 1, 2, 3, 4, 5, X, 7
9695  // 0, 1, 2, 3, 4, 5, 6, X
9696  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9697 
9698  bool FoundCandidate = false;
9699  // Go through the mask of half-words to find an element that's being moved
9700  // from one vector to the other.
9701  for (unsigned i = 0; i < NumHalfWords; ++i) {
9702  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9703  uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9704  uint32_t MaskOtherElts = ~(0xF << MaskShift);
9705  uint32_t TargetOrder = 0x0;
9706 
9707  // If both vector operands for the shuffle are the same vector, the mask
9708  // will contain only elements from the first one and the second one will be
9709  // undef.
9710  if (V2.isUndef()) {
9711  ShiftElts = 0;
9712  unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9713  TargetOrder = OriginalOrderLow;
9714  Swap = false;
9715  // Skip if not the correct element or mask of other elements don't equal
9716  // to our expected order.
9717  if (MaskOneElt == VINSERTHSrcElem &&
9718  (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9719  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9720  FoundCandidate = true;
9721  break;
9722  }
9723  } else { // If both operands are defined.
9724  // Target order is [8,15] if the current mask is between [0,7].
9725  TargetOrder =
9726  (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9727  // Skip if mask of other elements don't equal our expected order.
9728  if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9729  // We only need the last 3 bits for the number of shifts.
9730  ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9731  : BigEndianShifts[MaskOneElt & 0x7];
9732  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9733  Swap = MaskOneElt < NumHalfWords;
9734  FoundCandidate = true;
9735  break;
9736  }
9737  }
9738  }
9739 
9740  if (!FoundCandidate)
9741  return SDValue();
9742 
9743  // Candidate found, construct the proper SDAG sequence with VINSERTH,
9744  // optionally with VECSHL if shift is required.
9745  if (Swap)
9746  std::swap(V1, V2);
9747  if (V2.isUndef())
9748  V2 = V1;
9749  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9750  if (ShiftElts) {
9751  // Double ShiftElts because we're left shifting on v16i8 type.
9752  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9753  DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9754  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9755  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9756  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9757  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9758  }
9759  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9760  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9761  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9762  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9763 }
9764 
9765 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9766 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9767 /// return the default SDValue.
9768 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9769  SelectionDAG &DAG) const {
9770  // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9771  // to v16i8. Peek through the bitcasts to get the actual operands.
9774 
9775  auto ShuffleMask = SVN->getMask();
9776  SDValue VecShuffle(SVN, 0);
9777  SDLoc DL(SVN);
9778 
9779  // Check that we have a four byte shuffle.
9780  if (!isNByteElemShuffleMask(SVN, 4, 1))
9781  return SDValue();
9782 
9783  // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9784  if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9785  std::swap(LHS, RHS);
9787  ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
9788  if (!CommutedSV)
9789  return SDValue();
9790  ShuffleMask = CommutedSV->getMask();
9791  }
9792 
9793  // Ensure that the RHS is a vector of constants.
9794  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9795  if (!BVN)
9796  return SDValue();
9797 
9798  // Check if RHS is a splat of 4-bytes (or smaller).
9799  APInt APSplatValue, APSplatUndef;
9800  unsigned SplatBitSize;
9801  bool HasAnyUndefs;
9802  if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9803  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9804  SplatBitSize > 32)
9805  return SDValue();
9806 
9807  // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9808  // The instruction splats a constant C into two words of the source vector
9809  // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9810  // Thus we check that the shuffle mask is the equivalent of
9811  // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9812  // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9813  // within each word are consecutive, so we only need to check the first byte.
9814  SDValue Index;
9815  bool IsLE = Subtarget.isLittleEndian();
9816  if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9817  (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9818  ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9819  Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9820  else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9821  (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9822  ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9823  Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9824  else
9825  return SDValue();
9826 
9827  // If the splat is narrower than 32-bits, we need to get the 32-bit value
9828  // for XXSPLTI32DX.
9829  unsigned SplatVal = APSplatValue.getZExtValue();
9830  for (; SplatBitSize < 32; SplatBitSize <<= 1)
9831  SplatVal |= (SplatVal << SplatBitSize);
9832 
9833  SDValue SplatNode = DAG.getNode(
9835  Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9836  return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9837 }
9838 
9839 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9840 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9841 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9842 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9843 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9844  assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9845  assert(Op.getValueType() == MVT::v1i128 &&
9846  "Only set v1i128 as custom, other type shouldn't reach here!");
9847  SDLoc dl(Op);
9848  SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9849  SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9850  unsigned SHLAmt = N1.getConstantOperandVal(0);
9851  if (SHLAmt % 8 == 0) {
9852  std::array<int, 16> Mask;
9853  std::iota(Mask.begin(), Mask.end(), 0);
9854  std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9855  if (SDValue Shuffle =
9857  DAG.getUNDEF(MVT::v16i8), Mask))
9858  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9859  }
9860  SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9861  SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9862  DAG.getConstant(SHLAmt, dl, MVT::i32));
9863  SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9864  DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9865  SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9866  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9867 }
9868 
9869 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9870 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
9871 /// return the code it can be lowered into. Worst case, it can always be
9872 /// lowered into a vperm.
9873 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9874  SelectionDAG &DAG) const {
9875  SDLoc dl(Op);
9876  SDValue V1 = Op.getOperand(0);
9877  SDValue V2 = Op.getOperand(1);
9878  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9879 
9880  // Any nodes that were combined in the target-independent combiner prior
9881  // to vector legalization will not be sent to the target combine. Try to
9882  // combine it here.
9883  if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9884  if (!isa<ShuffleVectorSDNode>(NewShuffle))
9885  return NewShuffle;
9886  Op = NewShuffle;
9887  SVOp = cast<ShuffleVectorSDNode>(Op);
9888  V1 = Op.getOperand(0);
9889  V2 = Op.getOperand(1);
9890  }
9891  EVT VT = Op.getValueType();
9892  bool isLittleEndian = Subtarget.isLittleEndian();
9893 
9894  unsigned ShiftElts, InsertAtByte;
9895  bool Swap = false;
9896 
9897  // If this is a load-and-splat, we can do that with a single instruction
9898  // in some cases. However if the load has multiple uses, we don't want to
9899  // combine it because that will just produce multiple loads.
9900  bool IsPermutedLoad = false;
9901  const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9902  if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9903  (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9904  InputLoad->hasOneUse()) {
9905  bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9906  int SplatIdx =
9907  PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9908 
9909  // The splat index for permuted loads will be in the left half of the vector
9910  // which is strictly wider than the loaded value by 8 bytes. So we need to
9911  // adjust the splat index to point to the correct address in memory.
9912  if (IsPermutedLoad) {
9913  assert((isLittleEndian || IsFourByte) &&
9914  "Unexpected size for permuted load on big endian target");
9915  SplatIdx += IsFourByte ? 2 : 1;
9916  assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9917  "Splat of a value outside of the loaded memory");
9918  }
9919 
9920  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9921  // For 4-byte load-and-splat, we need Power9.
9922  if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9923  uint64_t Offset = 0;
9924  if (IsFourByte)
9925  Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9926  else
9927  Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9928 
9929  // If the width of the load is the same as the width of the splat,
9930  // loading with an offset would load the wrong memory.
9931  if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9932  Offset = 0;
9933 
9934  SDValue BasePtr = LD->getBasePtr();
9935  if (Offset != 0)
9937  BasePtr, DAG.getIntPtrConstant(Offset, dl));
9938  SDValue Ops[] = {
9939  LD->getChain(), // Chain
9940  BasePtr, // BasePtr
9941  DAG.getValueType(Op.getValueType()) // VT
9942  };
9943  SDVTList VTL =
9944  DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9945  SDValue LdSplt =
9947  Ops, LD->getMemoryVT(), LD->getMemOperand());
9948  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9949  if (LdSplt.getValueType() != SVOp->getValueType(0))
9950  LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9951  return LdSplt;
9952  }
9953  }
9954 
9955  // All v2i64 and v2f64 shuffles are legal
9956  if (VT == MVT::v2i64 || VT == MVT::v2f64)
9957  return Op;
9958 
9959  if (Subtarget.hasP9Vector() &&
9960  PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9961  isLittleEndian)) {
9962  if (Swap)
9963  std::swap(V1, V2);
9964  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9965  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9966  if (ShiftElts) {
9967  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9968  DAG.getConstant(ShiftElts, dl, MVT::i32));
9969  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9970  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9971  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9972  }
9973  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9974  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9975  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9976  }
9977 
9978  if (Subtarget.hasPrefixInstrs()) {
9979  SDValue SplatInsertNode;
9980  if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9981  return SplatInsertNode;
9982  }
9983 
9984  if (Subtarget.hasP9Altivec()) {
9985  SDValue NewISDNode;
9986  if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9987  return NewISDNode;
9988 
9989  if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9990  return NewISDNode;
9991  }
9992 
9993  if (Subtarget.hasVSX() &&
9994  PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9995  if (Swap)
9996  std::swap(V1, V2);
9997  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9998  SDValue Conv2 =
9999  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10000 
10001  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10002  DAG.getConstant(ShiftElts, dl, MVT::i32));
10003  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10004  }
10005 
10006  if (Subtarget.hasVSX() &&
10007  PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10008  if (Swap)
10009  std::swap(V1, V2);
10010  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10011  SDValue Conv2 =
10012  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10013 
10014  SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10015  DAG.getConstant(ShiftElts, dl, MVT::i32));
10016  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10017  }
10018 
10019  if (Subtarget.hasP9Vector()) {
10020  if (PPC::isXXBRHShuffleMask(SVOp)) {
10021  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10022  SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10023  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10024  } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10025  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10026  SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10027  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10028  } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10029  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10030  SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10031  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10032  } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10033  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10034  SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10035  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10036  }
10037  }
10038 
10039  if (Subtarget.hasVSX()) {
10040  if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10041  int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10042 
10043  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10044  SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10045  DAG.getConstant(SplatIdx, dl, MVT::i32));
10046  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10047  }
10048 
10049  // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10050  if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10051  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10052  SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10053  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10054  }
10055  }
10056 
10057  // Cases that are handled by instructions that take permute immediates
10058  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10059  // selected by the instruction selector.
10060  if (V2.isUndef()) {
10061  if (PPC::isSplatShuffleMask(SVOp, 1) ||
10062  PPC::isSplatShuffleMask(SVOp, 2) ||
10063  PPC::isSplatShuffleMask(SVOp, 4) ||
10064  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10065  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10066  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10067  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10068  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10069  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10070  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10071  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10072  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10073  (Subtarget.hasP8Altivec() && (
10074  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10075  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10076  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10077  return Op;
10078  }
10079  }
10080 
10081  // Altivec has a variety of "shuffle immediates" that take two vector inputs
10082  // and produce a fixed permutation. If any of these match, do not lower to
10083  // VPERM.
10084  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10085  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10086  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10087  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10088  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10089  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10090  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10091  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10092  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10093  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10094  (Subtarget.hasP8Altivec() && (
10095  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10096  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10097  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10098  return Op;
10099 
10100  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10101  // perfect shuffle table to emit an optimal matching sequence.
10102  ArrayRef<int> PermMask = SVOp->getMask();
10103 
10104  if (!DisablePerfectShuffle && !isLittleEndian) {
10105  unsigned PFIndexes[4];
10106  bool isFourElementShuffle = true;
10107  for (unsigned i = 0; i != 4 && isFourElementShuffle;
10108  ++i) { // Element number
10109  unsigned EltNo = 8; // Start out undef.
10110  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10111  if (PermMask[i * 4 + j] < 0)
10112  continue; // Undef, ignore it.
10113 
10114  unsigned ByteSource = PermMask[i * 4 + j];
10115  if ((ByteSource & 3) != j) {
10116  isFourElementShuffle = false;
10117  break;
10118  }
10119 
10120  if (EltNo == 8) {
10121  EltNo = ByteSource / 4;
10122  } else if (EltNo != ByteSource / 4) {
10123  isFourElementShuffle = false;
10124  break;
10125  }
10126  }
10127  PFIndexes[i] = EltNo;
10128  }
10129 
10130  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10131  // perfect shuffle vector to determine if it is cost effective to do this as
10132  // discrete instructions, or whether we should use a vperm.
10133  // For now, we skip this for little endian until such time as we have a
10134  // little-endian perfect shuffle table.
10135  if (isFourElementShuffle) {
10136  // Compute the index in the perfect shuffle table.
10137  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10138  PFIndexes[2] * 9 + PFIndexes[3];
10139 
10140  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10141  unsigned Cost = (PFEntry >> 30);
10142 
10143  // Determining when to avoid vperm is tricky. Many things affect the cost
10144  // of vperm, particularly how many times the perm mask needs to be
10145  // computed. For example, if the perm mask can be hoisted out of a loop or
10146  // is already used (perhaps because there are multiple permutes with the
10147  // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10148  // permute mask out of the loop requires an extra register.
10149  //
10150  // As a compromise, we only emit discrete instructions if the shuffle can
10151  // be generated in 3 or fewer operations. When we have loop information
10152  // available, if this block is within a loop, we should avoid using vperm
10153  // for 3-operation perms and use a constant pool load instead.
10154  if (Cost < 3)
10155  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10156  }
10157  }
10158 
10159  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10160  // vector that will get spilled to the constant pool.
10161  if (V2.isUndef()) V2 = V1;
10162 
10163  return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10164 }
10165 
10166 SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10167  ArrayRef<int> PermMask, EVT VT,
10168  SDValue V1, SDValue V2) const {
10169  unsigned Opcode = PPCISD::VPERM;
10170  EVT ValType = V1.getValueType();
10171  SDLoc dl(Op);
10172  bool NeedSwap = false;
10173  bool isLittleEndian = Subtarget.isLittleEndian();
10174  bool isPPC64 = Subtarget.isPPC64();
10175 
10176  // Only need to place items backwards in LE,
10177  // the mask will be properly calculated.
10178  if (isLittleEndian)
10179  std::swap(V1, V2);
10180 
10181  if (Subtarget.isISA3_0() && (V1->hasOneUse() || V2->hasOneUse())) {
10182  LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10183  "XXPERM instead\n");
10184  Opcode = PPCISD::XXPERM;
10185 
10186  // if V2 is dead, then we swap V1 and V2 so we can
10187  // use V2 as the destination instead.
10188  if (!V1->hasOneUse() && V2->hasOneUse()) {
10189  std::swap(V1, V2);
10190  NeedSwap = !NeedSwap;
10191  }
10192  }
10193 
10194  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10195  // that it is in input element units, not in bytes. Convert now.
10196 
10197  // For little endian, the order of the input vectors is reversed, and
10198  // the permutation mask is complemented with respect to 31. This is
10199  // necessary to produce proper semantics with the big-endian-based vperm
10200  // instruction.
10201  EVT EltVT = V1.getValueType().getVectorElementType();
10202  unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10203 
10204  bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10205  bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10206 
10207  /*
10208  Vectors will be appended like so: [ V1 | v2 ]
10209  XXSWAPD on V1:
10210  [ A | B | C | D ] -> [ C | D | A | B ]
10211  0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10212  i.e. index of A, B += 8, and index of C, D -= 8.
10213  XXSWAPD on V2:
10214  [ E | F | G | H ] -> [ G | H | E | F ]
10215  16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10216  i.e. index of E, F += 8, index of G, H -= 8
10217  Swap V1 and V2:
10218  [ V1 | V2 ] -> [ V2 | V1 ]
10219  0-15 16-31 0-15 16-31
10220  i.e. index of V1 += 16, index of V2 -= 16
10221  */
10222 
10223  SmallVector<SDValue, 16> ResultMask;
10224  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10225  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10226 
10227  if (Opcode == PPCISD::XXPERM) {
10228  if (V1HasXXSWAPD) {
10229  if (SrcElt < 8)
10230  SrcElt += 8;
10231  else if (SrcElt < 16)
10232  SrcElt -= 8;
10233  }
10234  if (V2HasXXSWAPD) {
10235  if (SrcElt > 23)
10236  SrcElt -= 8;
10237  else if (SrcElt > 15)
10238  SrcElt += 8;
10239  }
10240  if (NeedSwap) {
10241  if (SrcElt < 16)
10242  SrcElt += 16;
10243  else
10244  SrcElt -= 16;
10245  }
10246  }
10247 
10248  for (unsigned j = 0; j != BytesPerElement; ++j)
10249  if (isLittleEndian)
10250  ResultMask.push_back(
10251  DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10252  else
10253  ResultMask.push_back(
10254  DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10255  }
10256 
10257  if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10258  if (V1HasXXSWAPD) {
10259  dl = SDLoc(V1->getOperand(0));
10260  V1 = V1->getOperand(0)->getOperand(1);
10261  }
10262  if (V2HasXXSWAPD) {
10263  dl = SDLoc(V2->getOperand(0));
10264  V2 = V2->getOperand(0)->getOperand(1);
10265  }
10266  if (isPPC64 && ValType != MVT::v2f64)
10267  V1 = DAG.getBitcast(MVT::v2f64, V1);
10268  if (isPPC64 && V2.getValueType() != MVT::v2f64)
10269  V2 = DAG.getBitcast(MVT::v2f64, V2);
10270  }
10271 
10272  ShufflesHandledWithVPERM++;
10273  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10274  LLVM_DEBUG({
10275  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10276  if (Opcode == PPCISD::XXPERM) {
10277  dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10278  } else {
10279  dbgs() << "Emitting a VPERM for the following shuffle:\n";
10280  }
10281  SVOp->dump();
10282  dbgs() << "With the following permute control vector:\n";
10283  VPermMask.dump();
10284  });
10285 
10286  if (Opcode == PPCISD::XXPERM)
10287  VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10288 
10289  SDValue VPERMNode =
10290  DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10291 
10292  VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10293  return VPERMNode;
10294 }
10295 
10296 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10297 /// vector comparison. If it is, return true and fill in Opc/isDot with
10298 /// information about the intrinsic.
10299 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10300  bool &isDot, const PPCSubtarget &Subtarget) {
10301  unsigned IntrinsicID =
10302  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10303  CompareOpc = -1;
10304  isDot = false;
10305  switch (IntrinsicID) {
10306  default:
10307  return false;
10308  // Comparison predicates.
10309  case Intrinsic::ppc_altivec_vcmpbfp_p:
10310  CompareOpc = 966;
10311  isDot = true;
10312  break;
10313  case Intrinsic::ppc_altivec_vcmpeqfp_p:
10314  CompareOpc = 198;
10315  isDot = true;
10316  break;
10317  case Intrinsic::ppc_altivec_vcmpequb_p:
10318  CompareOpc = 6;
10319  isDot = true;
10320  break;
10321  case Intrinsic::ppc_altivec_vcmpequh_p:
10322  CompareOpc = 70;
10323  isDot = true;
10324  break;
10325  case Intrinsic::ppc_altivec_vcmpequw_p:
10326  CompareOpc = 134;
10327  isDot = true;
10328  break;
10329  case Intrinsic::ppc_altivec_vcmpequd_p:
10330  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10331  CompareOpc = 199;
10332  isDot = true;
10333  } else
10334  return false;
10335  break;
10336  case Intrinsic::ppc_altivec_vcmpneb_p:
10337  case Intrinsic::ppc_altivec_vcmpneh_p:
10338  case Intrinsic::ppc_altivec_vcmpnew_p:
10339  case Intrinsic::ppc_altivec_vcmpnezb_p:
10340  case Intrinsic::ppc_altivec_vcmpnezh_p:
10341  case Intrinsic::ppc_altivec_vcmpnezw_p:
10342  if (Subtarget.hasP9Altivec()) {
10343  switch (IntrinsicID) {
10344  default:
10345  llvm_unreachable("Unknown comparison intrinsic.");
10346  case Intrinsic::ppc_altivec_vcmpneb_p:
10347  CompareOpc = 7;
10348  break;
10349  case Intrinsic::ppc_altivec_vcmpneh_p:
10350  CompareOpc = 71;
10351  break;
10352  case Intrinsic::ppc_altivec_vcmpnew_p:
10353  CompareOpc = 135;
10354  break;
10355  case Intrinsic::ppc_altivec_vcmpnezb_p:
10356  CompareOpc = 263;
10357  break;
10358  case Intrinsic::ppc_altivec_vcmpnezh_p:
10359  CompareOpc = 327;
10360  break;
10361  case Intrinsic::ppc_altivec_vcmpnezw_p:
10362  CompareOpc = 391;
10363  break;
10364  }
10365  isDot = true;
10366  } else
10367  return false;
10368  break;
10369  case Intrinsic::ppc_altivec_vcmpgefp_p:
10370  CompareOpc = 454;
10371  isDot = true;
10372  break;
10373  case Intrinsic::ppc_altivec_vcmpgtfp_p:
10374  CompareOpc = 710;
10375  isDot = true;
10376  break;
10377  case Intrinsic::ppc_altivec_vcmpgtsb_p:
10378  CompareOpc = 774;
10379  isDot = true;
10380  break;
10381  case Intrinsic::ppc_altivec_vcmpgtsh_p:
10382  CompareOpc = 838;
10383  isDot = true;
10384  break;
10385  case Intrinsic::ppc_altivec_vcmpgtsw_p:
10386  CompareOpc = 902;
10387  isDot = true;
10388  break;
10389  case Intrinsic::ppc_altivec_vcmpgtsd_p:
10390  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10391  CompareOpc = 967;
10392  isDot = true;
10393  } else
10394  return false;
10395  break;
10396  case Intrinsic::ppc_altivec_vcmpgtub_p:
10397  CompareOpc = 518;
10398  isDot = true;
10399  break;
10400  case Intrinsic::ppc_altivec_vcmpgtuh_p:
10401  CompareOpc = 582;
10402  isDot = true;
10403  break;
10404  case Intrinsic::ppc_altivec_vcmpgtuw_p:
10405  CompareOpc = 646;
10406  isDot = true;
10407  break;
10408  case Intrinsic::ppc_altivec_vcmpgtud_p:
10409  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10410  CompareOpc = 711;
10411  isDot = true;
10412  } else
10413  return false;
10414  break;
10415 
10416  case Intrinsic::ppc_altivec_vcmpequq:
10417  case Intrinsic::ppc_altivec_vcmpgtsq:
10418  case Intrinsic::ppc_altivec_vcmpgtuq:
10419  if (!Subtarget.isISA3_1())
10420  return false;
10421  switch (IntrinsicID) {
10422  default:
10423  llvm_unreachable("Unknown comparison intrinsic.");
10424  case Intrinsic::ppc_altivec_vcmpequq:
10425  CompareOpc = 455;
10426  break;
10427  case Intrinsic::ppc_altivec_vcmpgtsq:
10428  CompareOpc = 903;
10429  break;
10430  case Intrinsic::ppc_altivec_vcmpgtuq:
10431  CompareOpc = 647;
10432  break;
10433  }
10434  break;
10435 
10436  // VSX predicate comparisons use the same infrastructure
10437  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10438  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10439  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10440  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10441  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10442  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10443  if (Subtarget.hasVSX()) {
10444  switch (IntrinsicID) {
10445  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10446  CompareOpc = 99;
10447  break;
10448  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10449  CompareOpc = 115;
10450  break;
10451  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10452  CompareOpc = 107;
10453  break;
10454  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10455  CompareOpc = 67;
10456  break;
10457  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10458  CompareOpc = 83;
10459  break;
10460  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10461  CompareOpc = 75;
10462  break;
10463  }
10464  isDot = true;
10465  } else
10466  return false;
10467  break;
10468 
10469  // Normal Comparisons.
10470  case Intrinsic::ppc_altivec_vcmpbfp:
10471  CompareOpc = 966;
10472  break;
10473  case Intrinsic::ppc_altivec_vcmpeqfp:
10474  CompareOpc = 198;
10475  break;
10476  case Intrinsic::ppc_altivec_vcmpequb:
10477  CompareOpc = 6;
10478  break;
10479  case Intrinsic::ppc_altivec_vcmpequh:
10480  CompareOpc = 70;
10481  break;
10482  case Intrinsic::ppc_altivec_vcmpequw:
10483  CompareOpc = 134;
10484  break;
10485  case Intrinsic::ppc_altivec_vcmpequd:
10486  if (Subtarget.hasP8Altivec())
10487  CompareOpc = 199;
10488  else
10489  return false;
10490  break;
10491  case Intrinsic::ppc_altivec_vcmpneb:
10492  case Intrinsic::ppc_altivec_vcmpneh:
10493  case Intrinsic::ppc_altivec_vcmpnew:
10494  case Intrinsic::ppc_altivec_vcmpnezb:
10495  case Intrinsic::ppc_altivec_vcmpnezh:
10496  case Intrinsic::ppc_altivec_vcmpnezw:
10497  if (Subtarget.hasP9Altivec())
10498  switch (IntrinsicID) {
10499  default:
10500  llvm_unreachable("Unknown comparison intrinsic.");
10501  case Intrinsic::ppc_altivec_vcmpneb:
10502  CompareOpc = 7;
10503  break;
10504  case Intrinsic::ppc_altivec_vcmpneh:
10505  CompareOpc = 71;
10506  break;
10507  case Intrinsic::ppc_altivec_vcmpnew:
10508  CompareOpc = 135;
10509  break;
10510  case Intrinsic::ppc_altivec_vcmpnezb:
10511  CompareOpc = 263;
10512  break;
10513  case Intrinsic::ppc_altivec_vcmpnezh:
10514  CompareOpc = 327;
10515  break;
10516  case Intrinsic::ppc_altivec_vcmpnezw:
10517  CompareOpc = 391;
10518  break;
10519  }
10520  else
10521  return false;
10522  break;
10523  case Intrinsic::ppc_altivec_vcmpgefp:
10524  CompareOpc = 454;
10525  break;
10526  case Intrinsic::ppc_altivec_vcmpgtfp:
10527  CompareOpc = 710;
10528  break;
10529  case Intrinsic::ppc_altivec_vcmpgtsb:
10530  CompareOpc = 774;
10531  break;
10532  case Intrinsic::ppc_altivec_vcmpgtsh:
10533  CompareOpc = 838;
10534  break;
10535  case Intrinsic::ppc_altivec_vcmpgtsw:
10536  CompareOpc = 902;
10537  break;
10538  case Intrinsic::ppc_altivec_vcmpgtsd:
10539  if (Subtarget.hasP8Altivec())
10540  CompareOpc = 967;
10541  else
10542  return false;
10543  break;
10544  case Intrinsic::ppc_altivec_vcmpgtub:
10545  CompareOpc = 518;
10546  break;
10547  case Intrinsic::ppc_altivec_vcmpgtuh:
10548  CompareOpc = 582;
10549  break;
10550  case Intrinsic::ppc_altivec_vcmpgtuw:
10551  CompareOpc = 646;
10552  break;
10553  case Intrinsic::ppc_altivec_vcmpgtud:
10554  if (Subtarget.hasP8Altivec())
10555  CompareOpc = 711;
10556  else
10557  return false;
10558  break;
10559  case Intrinsic::ppc_altivec_vcmpequq_p:
10560  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10561  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10562  if (!Subtarget.isISA3_1())
10563  return false;
10564  switch (IntrinsicID) {
10565  default:
10566  llvm_unreachable("Unknown comparison intrinsic.");
10567  case Intrinsic::ppc_altivec_vcmpequq_p:
10568  CompareOpc = 455;
10569  break;
10570  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10571  CompareOpc = 903;
10572  break;
10573  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10574  CompareOpc = 647;
10575  break;
10576  }
10577  isDot = true;
10578  break;
10579  }
10580  return true;
10581 }
10582 
10583 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10584 /// lower, do it, otherwise return null.
10585 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10586  SelectionDAG &DAG) const {
10587  unsigned IntrinsicID =
10588  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10589 
10590  SDLoc dl(Op);
10591 
10592  switch (IntrinsicID) {
10593  case Intrinsic::thread_pointer:
10594  // Reads the thread pointer register, used for __builtin_thread_pointer.
10595  if (Subtarget.isPPC64())
10596  return DAG.getRegister(PPC::X13, MVT::i64);
10597  return DAG.getRegister(PPC::R2, MVT::i32);
10598 
10599  case Intrinsic::ppc_mma_disassemble_acc: {
10600  if (Subtarget.isISAFuture()) {
10601  EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10602  SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10603  ArrayRef(ReturnTypes, 2),
10604  Op.getOperand(1)),
10605  0);
10606  SmallVector<SDValue, 4> RetOps;
10607  SDValue Value = SDValue(WideVec.getNode(), 0);
10608  SDValue Value2 = SDValue(WideVec.getNode(), 1);
10609 
10610  SDValue Extract;
10611  Extract = DAG.getNode(
10613  Subtarget.isLittleEndian() ? Value2 : Value,
10614  DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10615  dl, getPointerTy(DAG.getDataLayout())));
10616  RetOps.push_back(Extract);
10617  Extract = DAG.getNode(
10619  Subtarget.isLittleEndian() ? Value2 : Value,
10620  DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10621  dl, getPointerTy(DAG.getDataLayout())));
10622  RetOps.push_back(Extract);
10623  Extract = DAG.getNode(
10625  Subtarget.isLittleEndian() ? Value : Value2,
10626  DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10627  dl, getPointerTy(DAG.getDataLayout())));
10628  RetOps.push_back(Extract);
10629  Extract = DAG.getNode(
10631  Subtarget.isLittleEndian() ? Value : Value2,
10632  DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10633  dl, getPointerTy(DAG.getDataLayout())));
10634  RetOps.push_back(Extract);
10635  return DAG.getMergeValues(RetOps, dl);
10636  }
10638  }
10639  case Intrinsic::ppc_vsx_disassemble_pair: {
10640  int NumVecs = 2;
10641  SDValue WideVec = Op.getOperand(1);
10642  if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10643  NumVecs = 4;
10644  WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10645  }
10646  SmallVector<SDValue, 4> RetOps;
10647  for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10648  SDValue Extract = DAG.getNode(
10649  PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10650  DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10651  : VecNo,
10652  dl, getPointerTy(DAG.getDataLayout())));
10653  RetOps.push_back(Extract);
10654  }
10655  return DAG.getMergeValues(RetOps, dl);
10656  }
10657 
10658  case Intrinsic::ppc_unpack_longdouble: {
10659  auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10660  assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10661  "Argument of long double unpack must be 0 or 1!");
10662  return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10663  DAG.getConstant(!!(Idx->getSExtValue()), dl,
10664  Idx->getValueType(0)));
10665  }
10666 
10667  case Intrinsic::ppc_compare_exp_lt:
10668  case Intrinsic::ppc_compare_exp_gt:
10669  case Intrinsic::ppc_compare_exp_eq:
10670  case Intrinsic::ppc_compare_exp_uo: {
10671  unsigned Pred;
10672  switch (IntrinsicID) {
10673  case Intrinsic::ppc_compare_exp_lt:
10674  Pred = PPC::PRED_LT;
10675  break;
10676  case Intrinsic::ppc_compare_exp_gt:
10677  Pred = PPC::PRED_GT;
10678  break;
10679  case Intrinsic::ppc_compare_exp_eq:
10680  Pred = PPC::PRED_EQ;
10681  break;
10682  case Intrinsic::ppc_compare_exp_uo:
10683  Pred = PPC::PRED_UN;
10684  break;
10685  }
10686  return SDValue(
10687  DAG.getMachineNode(
10688  PPC::SELECT_CC_I4, dl, MVT::i32,
10689  {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10690  Op.getOperand(1), Op.getOperand(2)),
10691  0),
10692  DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10693  DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10694  0);
10695  }
10696  case Intrinsic::ppc_test_data_class: {
10697  EVT OpVT = Op.getOperand(1).getValueType();
10698  unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
10699  : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
10700  : PPC::XSTSTDCSP);
10701  return SDValue(
10702  DAG.getMachineNode(
10703  PPC::SELECT_CC_I4, dl, MVT::i32,
10704  {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10705  Op.getOperand(1)),
10706  0),
10707  DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10708  DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10709  0);
10710  }
10711  case Intrinsic::ppc_fnmsub: {
10712  EVT VT = Op.getOperand(1).getValueType();
10713  if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10714  return DAG.getNode(
10715  ISD::FNEG, dl, VT,
10716  DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10717  DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10718  return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10719  Op.getOperand(2), Op.getOperand(3));
10720  }
10721  case Intrinsic::ppc_convert_f128_to_ppcf128:
10722  case Intrinsic::ppc_convert_ppcf128_to_f128: {
10723  RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10724  ? RTLIB::CONVERT_PPCF128_F128
10725  : RTLIB::CONVERT_F128_PPCF128;
10726  MakeLibCallOptions CallOptions;
10727  std::pair<SDValue, SDValue> Result =
10728  makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10729  dl, SDValue());
10730  return Result.first;
10731  }
10732  case Intrinsic::ppc_maxfe:
10733  case Intrinsic::ppc_maxfl:
10734  case Intrinsic::ppc_maxfs:
10735  case Intrinsic::ppc_minfe:
10736  case Intrinsic::ppc_minfl:
10737  case Intrinsic::ppc_minfs: {
10738  EVT VT = Op.getValueType();
10739  assert(
10740  all_of(Op->ops().drop_front(4),
10741  [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
10742  "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10743  (void)VT;
10745  if (IntrinsicID == Intrinsic::ppc_minfe ||
10746  IntrinsicID == Intrinsic::ppc_minfl ||
10747  IntrinsicID == Intrinsic::ppc_minfs)
10748  CC = ISD::SETLT;
10749  unsigned I = Op.getNumOperands() - 2, Cnt = I;
10750  SDValue Res = Op.getOperand(I);
10751  for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10752  Res =
10753  DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10754  }
10755  return Res;
10756  }
10757  }
10758 
10759  // If this is a lowered altivec predicate compare, CompareOpc is set to the
10760  // opcode number of the comparison.
10761  int CompareOpc;
10762  bool isDot;
10763  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10764  return SDValue(); // Don't custom lower most intrinsics.
10765 
10766  // If this is a non-dot comparison, make the VCMP node and we are done.
10767  if (!isDot) {
10768  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10769  Op.getOperand(1), Op.getOperand(2),
10770  DAG.getConstant(CompareOpc, dl, MVT::i32));
10771  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10772  }
10773 
10774  // Create the PPCISD altivec 'dot' comparison node.
10775  SDValue Ops[] = {
10776  Op.getOperand(2), // LHS
10777  Op.getOperand(3), // RHS
10778  DAG.getConstant(CompareOpc, dl, MVT::i32)
10779  };
10780  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10781  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10782 
10783  // Now that we have the comparison, emit a copy from the CR to a GPR.
10784  // This is flagged to the above dot comparison.
10786  DAG.getRegister(PPC::CR6, MVT::i32),
10787  CompNode.getValue(1));
10788 
10789  // Unpack the result based on how the target uses it.
10790  unsigned BitNo; // Bit # of CR6.
10791  bool InvertBit; // Invert result?
10792  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10793  default: // Can't happen, don't crash on invalid number though.
10794  case 0: // Return the value of the EQ bit of CR6.
10795  BitNo = 0; InvertBit = false;
10796  break;
10797  case 1: // Return the inverted value of the EQ bit of CR6.
10798  BitNo = 0; InvertBit = true;
10799  break;
10800  case 2: // Return the value of the LT bit of CR6.
10801  BitNo = 2; InvertBit = false;
10802  break;
10803  case 3: // Return the inverted value of the LT bit of CR6.
10804  BitNo = 2; InvertBit = true;
10805  break;
10806  }
10807 
10808  // Shift the bit into the low position.
10809  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10810  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10811  // Isolate the bit.
10812  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10813  DAG.getConstant(1, dl, MVT::i32));
10814 
10815  // If we are supposed to, toggle the bit.
10816  if (InvertBit)
10817  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10818  DAG.getConstant(1, dl, MVT::i32));
10819  return Flags;
10820 }
10821 
10822 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10823  SelectionDAG &DAG) const {
10824  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10825  // the beginning of the argument list.
10826  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10827  SDLoc DL(Op);
10828  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10829  case Intrinsic::ppc_cfence: {
10830  assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10831  assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10832  SDValue Val = Op.getOperand(ArgStart + 1);
10833  EVT Ty = Val.getValueType();
10834  if (Ty == MVT::i128) {
10835  // FIXME: Testing one of two paired registers is sufficient to guarantee
10836  // ordering?
10837  Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10838  }
10839  return SDValue(
10840  DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10841  DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10842  Op.getOperand(0)),
10843  0);
10844  }
10845  default:
10846  break;
10847  }
10848  return SDValue();
10849 }
10850 
10851 // Lower scalar BSWAP64 to xxbrd.
10852 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10853  SDLoc dl(Op);
10854  if (!Subtarget.isPPC64())
10855  return Op;
10856  // MTVSRDD
10857  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10858  Op.getOperand(0));
10859  // XXBRD
10860  Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10861  // MFVSRD
10862  int VectorIndex = 0;
10863  if (Subtarget.isLittleEndian())
10864  VectorIndex = 1;
10866  DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10867  return Op;
10868 }
10869 
10870 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10871 // compared to a value that is atomically loaded (atomic loads zero-extend).
10872 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10873  SelectionDAG &DAG) const {
10874  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10875  "Expecting an atomic compare-and-swap here.");
10876  SDLoc dl(Op);
10877  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10878  EVT MemVT = AtomicNode->getMemoryVT();
10879  if (MemVT.getSizeInBits() >= 32)
10880  return Op;
10881 
10882  SDValue CmpOp = Op.getOperand(2);
10883  // If this is already correctly zero-extended, leave it alone.
10884  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10885  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10886  return Op;
10887 
10888  // Clear the high bits of the compare operand.
10889  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10890  SDValue NewCmpOp =
10891  DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10892  DAG.getConstant(MaskVal, dl, MVT::i32));
10893 
10894  // Replace the existing compare operand with the properly zero-extended one.
10896  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10897  Ops.push_back(AtomicNode->getOperand(i));
10898  Ops[2] = NewCmpOp;
10899  MachineMemOperand *MMO = AtomicNode->getMemOperand();
10900  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10901  auto NodeTy =
10903  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10904 }
10905 
10906 SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
10907  SelectionDAG &DAG) const {
10908  AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
10909  EVT MemVT = N->getMemoryVT();
10910  assert(MemVT.getSimpleVT() == MVT::i128 &&
10911  "Expect quadword atomic operations");
10912  SDLoc dl(N);
10913  unsigned Opc = N->getOpcode();
10914  switch (Opc) {
10915  case ISD::ATOMIC_LOAD: {
10916  // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
10917  // lowered to ppc instructions by pattern matching instruction selector.
10920  N->getOperand(0),
10921  DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
10922  for (int I = 1, E = N->getNumOperands(); I < E; ++I)
10923  Ops.push_back(N->getOperand(I));
10924  SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
10925  Ops, MemVT, N->getMemOperand());
10926  SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
10927  SDValue ValHi =
10928  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
10929  ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
10930  DAG.getConstant(64, dl, MVT::i32));
10931  SDValue Val =
10932  DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
10933  return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
10934  {Val, LoadedVal.getValue(2)});
10935  }
10936  case ISD::ATOMIC_STORE: {
10937  // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
10938  // lowered to ppc instructions by pattern matching instruction selector.
10939  SDVTList Tys = DAG.getVTList(MVT::Other);
10941  N->getOperand(0),
10942  DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
10943  SDValue Val = N->getOperand(2);
10944  SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
10945  SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
10946  DAG.getConstant(64, dl, MVT::i32));
10947  ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
10948  Ops.push_back(ValLo);
10949  Ops.push_back(ValHi);
10950  Ops.push_back(N->getOperand(1));
10951  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
10952  N->getMemOperand());
10953  }
10954  default:
10955  llvm_unreachable("Unexpected atomic opcode");
10956  }
10957 }
10958 
10959 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10960  SelectionDAG &DAG) const {
10961  SDLoc dl(Op);
10962  // Create a stack slot that is 16-byte aligned.
10964  int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10965  EVT PtrVT = getPointerTy(DAG.getDataLayout());
10966  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10967 
10968  // Store the input value into Value#0 of the stack slot.
10969  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10970  MachinePointerInfo());
10971  // Load it out.
10972  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10973 }
10974 
10975 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10976  SelectionDAG &DAG) const {
10977  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10978  "Should only be called for ISD::INSERT_VECTOR_ELT");
10979 
10980  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10981 
10982  EVT VT = Op.getValueType();
10983  SDLoc dl(Op);
10984  SDValue V1 = Op.getOperand(0);
10985  SDValue V2 = Op.getOperand(1);
10986 
10987  if (VT == MVT::v2f64 && C)
10988  return Op;
10989 
10990  if (Subtarget.hasP9Vector()) {
10991  // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
10992  // because on P10, it allows this specific insert_vector_elt load pattern to
10993  // utilize the refactored load and store infrastructure in order to exploit
10994  // prefixed loads.
10995  // On targets with inexpensive direct moves (Power9 and up), a
10996  // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
10997  // load since a single precision load will involve conversion to double
10998  // precision on the load followed by another conversion to single precision.
10999  if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11000  (isa<LoadSDNode>(V2))) {
11001  SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11002  SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11003  SDValue InsVecElt =
11004  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11005  BitcastLoad, Op.getOperand(2));
11006  return DAG.getBitcast(MVT::v4f32, InsVecElt);
11007  }
11008  }
11009 
11010  if (Subtarget.isISA3_1()) {
11011  if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11012  return SDValue();
11013  // On P10, we have legal lowering for constant and variable indices for
11014  // all vectors.
11015  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11016  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11017  return Op;
11018  }
11019 
11020  // Before P10, we have legal lowering for constant indices but not for
11021  // variable ones.
11022  if (!C)
11023  return SDValue();
11024 
11025  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11026  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11027  SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11028  unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11029  unsigned InsertAtElement = C->getZExtValue();
11030  unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11031  if (Subtarget.isLittleEndian()) {
11032  InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11033  }
11034  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11035  DAG.getConstant(InsertAtByte, dl, MVT::i32));
11036  }
11037  return Op;
11038 }
11039 
11040 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11041  SelectionDAG &DAG) const {
11042  SDLoc dl(Op);
11043  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11044  SDValue LoadChain = LN->getChain();
11045  SDValue BasePtr = LN->getBasePtr();
11046  EVT VT = Op.getValueType();
11047 
11048  if (VT != MVT::v256i1 && VT != MVT::v512i1)
11049  return Op;
11050 
11051  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11052  // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11053  // 2 or 4 vsx registers.
11054  assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11055  "Type unsupported without MMA");
11056  assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11057  "Type unsupported without paired vector support");
11058  Align Alignment = LN->getAlign();
11060  SmallVector<SDValue, 4> LoadChains;
11061  unsigned NumVecs = VT.getSizeInBits() / 128;
11062  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11063  SDValue Load =
11064  DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11065  LN->getPointerInfo().getWithOffset(Idx * 16),
11066  commonAlignment(Alignment, Idx * 16),
11067  LN->getMemOperand()->getFlags(), LN->getAAInfo());
11068  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11069  DAG.getConstant(16, dl, BasePtr.getValueType()));
11070  Loads.push_back(Load);
11071  LoadChains.push_back(Load.getValue(1));
11072  }
11073  if (Subtarget.isLittleEndian()) {
11074  std::reverse(Loads.begin(), Loads.end());
11075  std::reverse(LoadChains.begin(), LoadChains.end());
11076  }
11077  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11078  SDValue Value =
11080  dl, VT, Loads);
11081  SDValue RetOps[] = {Value, TF};
11082  return DAG.getMergeValues(RetOps, dl);
11083 }
11084 
11085 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11086  SelectionDAG &DAG) const {
11087  SDLoc dl(Op);
11088  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11089  SDValue StoreChain = SN->getChain();
11090  SDValue BasePtr = SN->getBasePtr();
11091  SDValue Value = SN->getValue();
11092  SDValue Value2 = SN->getValue();
11093  EVT StoreVT = Value.getValueType();
11094 
11095  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11096  return Op;
11097 
11098  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11099  // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11100  // underlying registers individually.
11101  assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11102  "Type unsupported without MMA");
11103  assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11104  "Type unsupported without paired vector support");
11105  Align Alignment = SN->getAlign();
11106  SmallVector<SDValue, 4> Stores;
11107  unsigned NumVecs = 2;
11108  if (StoreVT == MVT::v512i1) {
11109  if (Subtarget.isISAFuture()) {
11110  EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11111  MachineSDNode *ExtNode = DAG.getMachineNode(
11112  PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, 2), Op.getOperand(1));
11113 
11114  Value = SDValue(ExtNode, 0);
11115  Value2 = SDValue(ExtNode, 1);
11116  } else
11118  NumVecs = 4;
11119  }
11120  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11121  unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11122  SDValue Elt;
11123  if (Subtarget.isISAFuture()) {
11124  VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11126  Idx > 1 ? Value2 : Value,
11127  DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11128  } else
11130  DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11131 
11132  SDValue Store =
11133  DAG.getStore(StoreChain, dl, Elt, BasePtr,
11134  SN->getPointerInfo().getWithOffset(Idx * 16),
11135  commonAlignment(Alignment, Idx * 16),
11136  SN->getMemOperand()->getFlags(), SN->getAAInfo());
11137  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11138  DAG.getConstant(16, dl, BasePtr.getValueType()));
11139  Stores.push_back(Store);
11140  }
11141  SDValue TF = DAG.getTokenFactor(dl, Stores);
11142  return TF;
11143 }
11144 
11145 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11146  SDLoc dl(Op);
11147  if (Op.getValueType() == MVT::v4i32) {
11148  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11149 
11150  SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11151  // +16 as shift amt.
11152  SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11153  SDValue RHSSwap = // = vrlw RHS, 16
11154  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11155 
11156  // Shrinkify inputs to v8i16.
11157  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11158  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11159  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11160 
11161  // Low parts multiplied together, generating 32-bit results (we ignore the
11162  // top parts).
11163  SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11164  LHS, RHS, DAG, dl, MVT::v4i32);
11165 
11166  SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11167  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11168  // Shift the high parts up 16 bits.
11169  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11170  Neg16, DAG, dl);
11171  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11172  } else if (Op.getValueType() == MVT::v16i8) {
11173  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11174  bool isLittleEndian = Subtarget.isLittleEndian();
11175 
11176  // Multiply the even 8-bit parts, producing 16-bit sums.
11177  SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11178  LHS, RHS, DAG, dl, MVT::v8i16);
11179  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11180 
11181  // Multiply the odd 8-bit parts, producing 16-bit sums.
11182  SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11183  LHS, RHS, DAG, dl, MVT::v8i16);
11184  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11185 
11186  // Merge the results together. Because vmuleub and vmuloub are
11187  // instructions with a big-endian bias, we must reverse the
11188  // element numbering and reverse the meaning of "odd" and "even"
11189  // when generating little endian code.
11190  int Ops[16];
11191  for (unsigned i = 0; i != 8; ++i) {
11192  if (isLittleEndian) {
11193  Ops[i*2 ] = 2*i;
11194  Ops[i*2+1] = 2*i+16;
11195  } else {
11196  Ops[i*2 ] = 2*i+1;
11197  Ops[i*2+1] = 2*i+1+16;
11198  }
11199  }
11200  if (isLittleEndian)
11201  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11202  else
11203  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11204  } else {
11205  llvm_unreachable("Unknown mul to lower!");
11206  }
11207 }
11208 
11209 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11210  bool IsStrict = Op->isStrictFPOpcode();
11211  if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11212  !Subtarget.hasP9Vector())
11213  return SDValue();
11214 
11215  return Op;
11216 }
11217 
11218 // Custom lowering for fpext vf32 to v2f64
11219 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11220 
11221  assert(Op.getOpcode() == ISD::FP_EXTEND &&
11222  "Should only be called for ISD::FP_EXTEND");
11223 
11224  // FIXME: handle extends from half precision float vectors on P9.
11225  // We only want to custom lower an extend from v2f32 to v2f64.
11226  if (Op.getValueType() != MVT::v2f64 ||
11227  Op.getOperand(0).getValueType() != MVT::v2f32)
11228  return SDValue();
11229 
11230  SDLoc dl(Op);
11231  SDValue Op0 = Op.getOperand(0);
11232 
11233  switch (Op0.getOpcode()) {
11234  default:
11235  return SDValue();
11236  case ISD::EXTRACT_SUBVECTOR: {
11237  assert(Op0.getNumOperands() == 2 &&
11238  isa<ConstantSDNode>(Op0->getOperand(1)) &&
11239  "Node should have 2 operands with second one being a constant!");
11240 
11241  if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11242  return SDValue();
11243 
11244  // Custom lower is only done for high or low doubleword.
11245  int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11246  if (Idx % 2 != 0)
11247  return SDValue();
11248 
11249  // Since input is v4f32, at this point Idx is either 0 or 2.
11250  // Shift to get the doubleword position we want.
11251  int DWord = Idx >> 1;
11252 
11253  // High and low word positions are different on little endian.
11254  if (Subtarget.isLittleEndian())
11255  DWord ^= 0x1;
11256 
11257  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11258  Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11259  }
11260  case ISD::FADD:
11261  case ISD::FMUL:
11262  case ISD::FSUB: {
11263  SDValue NewLoad[2];
11264  for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11265  // Ensure both input are loads.
11266  SDValue LdOp = Op0.getOperand(i);
11267  if (LdOp.getOpcode() != ISD::LOAD)
11268  return SDValue();
11269  // Generate new load node.
11270  LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11271  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11272  NewLoad[i] = DAG.getMemIntrinsicNode(
11274  LD->getMemoryVT(), LD->getMemOperand());
11275  }
11276  SDValue NewOp =
11277  DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11278  NewLoad[1], Op0.getNode()->getFlags());
11279  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11280  DAG.getConstant(0, dl, MVT::i32));
11281  }
11282  case ISD::LOAD: {
11283  LoadSDNode *LD = cast<LoadSDNode>(Op0);
11284  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11285  SDValue NewLd = DAG.getMemIntrinsicNode(
11287  LD->getMemoryVT(), LD->getMemOperand());
11288  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11289  DAG.getConstant(0, dl, MVT::i32));
11290  }
11291  }
11292  llvm_unreachable("ERROR:Should return for all cases within swtich.");
11293 }
11294 
11295 /// LowerOperation - Provide custom lowering hooks for some operations.
11296 ///
11298  switch (Op.getOpcode()) {
11299  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11300  case ISD::FPOW: return lowerPow(Op, DAG);
11301  case ISD::FSIN: return lowerSin(Op, DAG);
11302  case ISD::FCOS: return lowerCos(Op, DAG);
11303  case ISD::FLOG: return lowerLog(Op, DAG);
11304  case ISD::FLOG10: return lowerLog10(Op, DAG);
11305  case ISD::FEXP: return lowerExp(Op, DAG);
11306  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11307  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11308  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11309  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11310  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11311  case ISD::STRICT_FSETCC:
11312  case ISD::STRICT_FSETCCS:
11313  case ISD::SETCC: return LowerSETCC(Op, DAG);
11314  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11315  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11316 
11317  case ISD::INLINEASM:
11318  case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11319  // Variable argument lowering.
11320  case ISD::VASTART: return LowerVASTART(Op, DAG);
11321  case ISD::VAARG: return LowerVAARG(Op, DAG);
11322  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11323 
11324  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11325  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11327  return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11328 
11329  // Exception handling lowering.
11330  case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11331  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11332  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11333 
11334  case ISD::LOAD: return LowerLOAD(Op, DAG);
11335  case ISD::STORE: return LowerSTORE(Op, DAG);
11336  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11337  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11340  case ISD::FP_TO_UINT:
11341  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11344  case ISD::UINT_TO_FP:
11345  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11346  case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11347 
11348  // Lower 64-bit shifts.
11349  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11350  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11351  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11352 
11353  case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11354  case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11355 
11356  // Vector-related lowering.
11357  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11358  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11359  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11360  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11361  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11362  case ISD::MUL: return LowerMUL(Op, DAG);
11363  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11364  case ISD::STRICT_FP_ROUND:
11365  case ISD::FP_ROUND:
11366  return LowerFP_ROUND(Op, DAG);
11367  case ISD::ROTL: return LowerROTL(Op, DAG);
11368 
11369  // For counter-based loop handling.
11370  case ISD::INTRINSIC_W_CHAIN: return SDValue();
11371 
11372  case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11373 
11374  // Frame & Return address.
11375  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11376  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11377 
11378  case ISD::INTRINSIC_VOID:
11379  return LowerINTRINSIC_VOID(Op, DAG);
11380  case ISD::BSWAP:
11381  return LowerBSWAP(Op, DAG);
11382  case ISD::ATOMIC_CMP_SWAP:
11383  return LowerATOMIC_CMP_SWAP(Op, DAG);
11384  case ISD::ATOMIC_STORE:
11385  return LowerATOMIC_LOAD_STORE(Op, DAG);
11386  }
11387 }
11388 
11391  SelectionDAG &DAG) const {
11392  SDLoc dl(N);
11393  switch (N->getOpcode()) {
11394  default:
11395  llvm_unreachable("Do not know how to custom type legalize this operation!");
11396  case ISD::ATOMIC_LOAD: {
11397  SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11398  Results.push_back(Res);
11399  Results.push_back(Res.getValue(1));
11400  break;
11401  }
11402  case ISD::READCYCLECOUNTER: {
11404  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11405 
11406  Results.push_back(
11407  DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11408  Results.push_back(RTB.getValue(2));
11409  break;
11410  }
11411  case ISD::INTRINSIC_W_CHAIN: {
11412  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11413  Intrinsic::loop_decrement)
11414  break;
11415 
11416  assert(N->getValueType(0) == MVT::i1 &&
11417  "Unexpected result type for CTR decrement intrinsic");
11418  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11419  N->getValueType(0));
11420  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11421  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11422  N->getOperand(1));
11423 
11424  Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11425  Results.push_back(NewInt.getValue(1));
11426  break;
11427  }
11428  case ISD::INTRINSIC_WO_CHAIN: {
11429  switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
11430  case Intrinsic::ppc_pack_longdouble:
11431  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11432  N->getOperand(2), N->getOperand(1)));
11433  break;
11434  case Intrinsic::ppc_maxfe:
11435  case Intrinsic::ppc_minfe:
11436  case Intrinsic::ppc_fnmsub:
11437  case Intrinsic::ppc_convert_f128_to_ppcf128:
11438  Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11439  break;
11440  }
11441  break;
11442  }
11443  case ISD::VAARG: {
11444  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11445  return;
11446 
11447  EVT VT = N->getValueType(0);
11448 
11449  if (VT == MVT::i64) {
11450  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11451 
11452  Results.push_back(NewNode);
11453  Results.push_back(NewNode.getValue(1));
11454  }
11455  return;
11456  }
11459  case ISD::FP_TO_SINT:
11460  case ISD::FP_TO_UINT: {
11461  // LowerFP_TO_INT() can only handle f32 and f64.
11462  if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11463  MVT::ppcf128)
11464  return;
11465  SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11466  Results.push_back(LoweredValue);
11467  if (N->isStrictFPOpcode())
11468  Results.push_back(LoweredValue.getValue(1));
11469  return;
11470  }
11471  case ISD::TRUNCATE: {
11472  if (!N->getValueType(0).isVector())
11473  return;
11474  SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11475  if (Lowered)
11476  Results.push_back(Lowered);
11477  return;
11478  }
11479  case ISD::FSHL:
11480  case ISD::FSHR:
11481  // Don't handle funnel shifts here.
11482  return;
11483  case ISD::BITCAST:
11484  // Don't handle bitcast here.
11485  return;
11486  case ISD::FP_EXTEND:
11487  SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11488  if (Lowered)
11489  Results.push_back(Lowered);
11490  return;
11491  }
11492 }
11493 
11494 //===----------------------------------------------------------------------===//
11495 // Other Lowering Code
11496 //===----------------------------------------------------------------------===//
11497 
11499  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11501  return Builder.CreateCall(Func, {});
11502 }
11503 
11504 // The mappings for emitLeading/TrailingFence is taken from
11505 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11507  Instruction *Inst,
11508  AtomicOrdering Ord) const {
11510  return callIntrinsic(Builder, Intrinsic::ppc_sync);
11511  if (isReleaseOrStronger(Ord))
11512  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11513  return nullptr;
11514 }
11515 
11517  Instruction *Inst,
11518  AtomicOrdering Ord) const {
11519  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11520  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11521  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11522  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11523  if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11524  return Builder.CreateCall(
11526  Builder.GetInsertBlock()->getParent()->getParent(),
11527  Intrinsic::ppc_cfence, {Inst->getType()}),
11528  {Inst});
11529  // FIXME: Can use isync for rmw operation.
11530  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11531  }
11532  return nullptr;
11533 }
11534 
11537  unsigned AtomicSize,
11538  unsigned BinOpcode,
11539  unsigned CmpOpcode,
11540  unsigned CmpPred) const {
11541  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11542  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11543 
11544  auto LoadMnemonic = PPC::LDARX;
11545  auto StoreMnemonic = PPC::STDCX;
11546  switch (AtomicSize) {
11547  default:
11548  llvm_unreachable("Unexpected size of atomic entity");
11549  case 1:
11550  LoadMnemonic = PPC::LBARX;
11551  StoreMnemonic = PPC::STBCX;
11552  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11553  break;
11554  case 2:
11555  LoadMnemonic = PPC::LHARX;
11556  StoreMnemonic = PPC::STHCX;
11557  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11558  break;
11559  case 4:
11560  LoadMnemonic = PPC::LWARX;
11561  StoreMnemonic = PPC::STWCX;
11562  break;
11563  case 8:
11564  LoadMnemonic = PPC::LDARX;
11565  StoreMnemonic = PPC::STDCX;
11566  break;
11567  }
11568 
11569  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11570  MachineFunction *F = BB->getParent();
11571  MachineFunction::iterator It = ++BB->getIterator();
11572 
11573  Register dest = MI.getOperand(0).getReg();
11574  Register ptrA = MI.getOperand(1).getReg();
11575  Register ptrB = MI.getOperand(2).getReg();
11576  Register incr = MI.getOperand(3).getReg();
11577  DebugLoc dl = MI.getDebugLoc();
11578 
11579  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11580  MachineBasicBlock *loop2MBB =
11581  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11582  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11583  F->insert(It, loopMBB);
11584  if (CmpOpcode)
11585  F->insert(It, loop2MBB);
11586  F->insert(It, exitMBB);
11587  exitMBB->splice(exitMBB->begin(), BB,
11588  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11590 
11591  MachineRegisterInfo &RegInfo = F->getRegInfo();
11592  Register TmpReg = (!BinOpcode) ? incr :
11593  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11594  : &PPC::GPRCRegClass);
11595 
11596  // thisMBB:
11597  // ...
11598  // fallthrough --> loopMBB
11599  BB->addSuccessor(loopMBB);
11600 
11601  // loopMBB:
11602  // l[wd]arx dest, ptr
11603  // add r0, dest, incr
11604  // st[wd]cx. r0, ptr
11605  // bne- loopMBB
11606  // fallthrough --> exitMBB
11607 
11608  // For max/min...
11609  // loopMBB:
11610  // l[wd]arx dest, ptr
11611  // cmpl?[wd] dest, incr
11612  // bgt exitMBB
11613  // loop2MBB:
11614  // st[wd]cx. dest, ptr
11615  // bne- loopMBB
11616  // fallthrough --> exitMBB
11617 
11618  BB = loopMBB;
11619  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11620  .addReg(ptrA).addReg(ptrB);
11621  if (BinOpcode)
11622  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11623  if (CmpOpcode) {
11624  Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11625  // Signed comparisons of byte or halfword values must be sign-extended.
11626  if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11627  Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11628  BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11629  ExtReg).addReg(dest);
11630  BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
11631  } else
11632  BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
11633 
11634  BuildMI(BB, dl, TII->get(PPC::BCC))
11635  .addImm(CmpPred)
11636  .addReg(CrReg)
11637  .addMBB(exitMBB);
11638  BB->addSuccessor(loop2MBB);
11639  BB->addSuccessor(exitMBB);
11640  BB = loop2MBB;
11641  }
11642  BuildMI(BB, dl, TII->get(StoreMnemonic))
11643  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11644  BuildMI(BB, dl, TII->get(PPC::BCC))
11645  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11646  BB->addSuccessor(loopMBB);
11647  BB->addSuccessor(exitMBB);
11648 
11649  // exitMBB:
11650  // ...
11651  BB = exitMBB;
11652  return BB;
11653 }
11654 
11656  switch(MI.getOpcode()) {
11657  default:
11658  return false;
11659  case PPC::COPY:
11660  return TII->isSignExtended(MI.getOperand(1).getReg(),
11661  &MI.getMF()->getRegInfo());
11662  case PPC::LHA:
11663  case PPC::LHA8:
11664  case PPC::LHAU:
11665  case PPC::LHAU8:
11666  case PPC::LHAUX:
11667  case PPC::LHAUX8:
11668  case PPC::LHAX:
11669  case PPC::LHAX8:
11670  case PPC::LWA:
11671  case PPC::LWAUX:
11672  case PPC::LWAX:
11673  case PPC::LWAX_32:
11674  case PPC::LWA_32:
11675  case PPC::PLHA:
11676  case PPC::PLHA8:
11677  case PPC::PLHA8pc:
11678  case PPC::PLHApc:
11679  case PPC::PLWA:
11680  case PPC::PLWA8:
11681  case PPC::PLWA8pc:
11682  case PPC::PLWApc:
11683  case PPC::EXTSB:
11684  case PPC::EXTSB8:
11685  case PPC::EXTSB8_32_64:
11686  case PPC::EXTSB8_rec:
11687  case PPC::EXTSB_rec:
11688  case PPC::EXTSH:
11689  case PPC::EXTSH8:
11690  case PPC::EXTSH8_32_64:
11691  case PPC::EXTSH8_rec:
11692  case PPC::EXTSH_rec:
11693  case PPC::EXTSW:
11694  case PPC::EXTSWSLI:
11695  case PPC::EXTSWSLI_32_64:
11696  case PPC::EXTSWSLI_32_64_rec:
11697  case PPC::EXTSWSLI_rec:
11698  case PPC::EXTSW_32:
11699  case PPC::EXTSW_32_64:
11700  case PPC::EXTSW_32_64_rec:
11701  case PPC::EXTSW_rec:
11702  case PPC::SRAW:
11703  case PPC::SRAWI:
11704  case PPC::SRAWI_rec:
11705  case PPC::SRAW_rec:
11706  return true;
11707  }
11708  return false;
11709 }
11710 
11713  bool is8bit, // operation
11714  unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11715  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11716  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11717 
11718  // If this is a signed comparison and the value being compared is not known
11719  // to be sign extended, sign extend it here.
11720  DebugLoc dl = MI.getDebugLoc();
11721  MachineFunction *F = BB->getParent();
11722  MachineRegisterInfo &RegInfo = F->getRegInfo();
11723  Register incr = MI.getOperand(3).getReg();
11724  bool IsSignExtended =
11725  incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
11726 
11727  if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11728  Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11729  BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11730  .addReg(MI.getOperand(3).getReg());
11731  MI.getOperand(3).setReg(ValueReg);
11732  }
11733  // If we support part-word atomic mnemonics, just use them
11734  if (Subtarget.hasPartwordAtomics())
11735  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11736  CmpPred);
11737 
11738  // In 64 bit mode we have to use 64 bits for addresses, even though the
11739  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11740  // registers without caring whether they're 32 or 64, but here we're
11741  // doing actual arithmetic on the addresses.
11742  bool is64bit = Subtarget.isPPC64();
11743  bool isLittleEndian = Subtarget.isLittleEndian();
11744  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11745 
11746  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11747  MachineFunction::iterator It = ++BB->getIterator();
11748 
11749  Register dest = MI.getOperand(0).getReg();
11750  Register ptrA = MI.getOperand(1).getReg();
11751  Register ptrB = MI.getOperand(2).getReg();
11752 
11753  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11754  MachineBasicBlock *loop2MBB =
11755  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11756  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11757  F->insert(It, loopMBB);
11758  if (CmpOpcode)
11759  F->insert(It, loop2MBB);
11760  F->insert(It, exitMBB);
11761  exitMBB->splice(exitMBB->begin(), BB,
11762  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11764 
11765  const TargetRegisterClass *RC =
11766  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11767  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11768 
11769  Register PtrReg = RegInfo.createVirtualRegister(RC);
11770  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11771  Register ShiftReg =
11772  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11773  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11774  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11775  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11776  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11777  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11778  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11779  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11780  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11781  Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
11782  Register Ptr1Reg;
11783  Register TmpReg =
11784  (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11785 
11786  // thisMBB:
11787  // ...
11788  // fallthrough --> loopMBB
11789  BB->addSuccessor(loopMBB);
11790 
11791  // The 4-byte load must be aligned, while a char or short may be
11792  // anywhere in the word. Hence all this nasty bookkeeping code.
11793  // add ptr1, ptrA, ptrB [copy if ptrA==0]
11794  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11795  // xori shift, shift1, 24 [16]
11796  // rlwinm ptr, ptr1, 0, 0, 29
11797  // slw incr2, incr, shift
11798  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11799  // slw mask, mask2, shift
11800  // loopMBB:
11801  // lwarx tmpDest, ptr
11802  // add tmp, tmpDest, incr2
11803  // andc tmp2, tmpDest, mask
11804  // and tmp3, tmp, mask
11805  // or tmp4, tmp3, tmp2
11806  // stwcx. tmp4, ptr
11807  // bne- loopMBB
11808  // fallthrough --> exitMBB
11809  // srw SrwDest, tmpDest, shift
11810  // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
11811  if (ptrA != ZeroReg) {
11812  Ptr1Reg = RegInfo.createVirtualRegister(RC);
11813  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11814  .addReg(ptrA)
11815  .addReg(ptrB);
11816  } else {
11817  Ptr1Reg = ptrB;
11818  }
11819  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11820  // mode.
11821  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11822  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11823  .addImm(3)
11824  .addImm(27)
11825  .addImm(is8bit ? 28 : 27);
11826  if (!isLittleEndian)
11827  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11828  .addReg(Shift1Reg)
11829  .addImm(is8bit ? 24 : 16);
11830  if (is64bit)
11831  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11832  .addReg(Ptr1Reg)
11833  .addImm(0)
11834  .addImm(61);
11835  else
11836  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11837  .addReg(Ptr1Reg)
11838  .addImm(0)
11839  .addImm(0)
11840  .addImm(29);
11841  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11842  if (is8bit)
11843  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11844  else {
11845  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11846  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11847  .addReg(Mask3Reg)
11848  .addImm(65535);
11849  }
11850  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11851  .addReg(Mask2Reg)
11852  .addReg(ShiftReg);
11853 
11854  BB = loopMBB;
11855  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11856  .addReg(ZeroReg)
11857  .addReg(PtrReg);
11858  if (BinOpcode)
11859  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11860  .addReg(Incr2Reg)
11861  .addReg(TmpDestReg);
11862  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11863  .addReg(TmpDestReg)
11864  .addReg(MaskReg);
11865  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11866  if (CmpOpcode) {
11867  // For unsigned comparisons, we can directly compare the shifted values.
11868  // For signed comparisons we shift and sign extend.
11869  Register SReg = RegInfo.createVirtualRegister(GPRC);
11870  Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11871  BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11872  .addReg(TmpDestReg)
11873  .addReg(MaskReg);
11874  unsigned ValueReg = SReg;
11875  unsigned CmpReg = Incr2Reg;
11876  if (CmpOpcode == PPC::CMPW) {
11877  ValueReg = RegInfo.createVirtualRegister(GPRC);
11878  BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11879  .addReg(SReg)
11880  .addReg(ShiftReg);
11881  Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11882  BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11883  .addReg(ValueReg);
11884  ValueReg = ValueSReg;
11885  CmpReg = incr;
11886  }
11887  BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
11888  BuildMI(BB, dl, TII->get(PPC::BCC))
11889  .addImm(CmpPred)
11890  .addReg(CrReg)
11891  .addMBB(exitMBB);
11892  BB->addSuccessor(loop2MBB);
11893  BB->addSuccessor(exitMBB);
11894  BB = loop2MBB;
11895  }
11896  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11897  BuildMI(BB, dl, TII->get(PPC::STWCX))
11898  .addReg(Tmp4Reg)
11899  .addReg(ZeroReg)
11900  .addReg(PtrReg);
11901  BuildMI(BB, dl, TII->get(PPC::BCC))
11903  .addReg(PPC::CR0)
11904  .addMBB(loopMBB);
11905  BB->addSuccessor(loopMBB);
11906  BB->addSuccessor(exitMBB);
11907 
11908  // exitMBB:
11909  // ...
11910  BB = exitMBB;
11911  // Since the shift amount is not a constant, we need to clear
11912  // the upper bits with a separate RLWINM.
11913  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
11914  .addReg(SrwDestReg)
11915  .addImm(0)
11916  .addImm(is8bit ? 24 : 16)
11917  .addImm(31);
11918  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
11919  .addReg(TmpDestReg)
11920  .addReg(ShiftReg);
11921  return BB;
11922 }
11923 
11926  MachineBasicBlock *MBB) const {
11927  DebugLoc DL = MI.getDebugLoc();
11928  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11929  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11930 
11931  MachineFunction *MF = MBB->getParent();
11933 
11934  const BasicBlock *BB = MBB->getBasicBlock();
11936 
11937  Register DstReg = MI.getOperand(0).getReg();
11938  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11939  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11940  Register mainDstReg = MRI.createVirtualRegister(RC);
11941  Register restoreDstReg = MRI.createVirtualRegister(RC);
11942 
11943  MVT PVT = getPointerTy(MF->getDataLayout());
11944  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11945  "Invalid Pointer Size!");
11946  // For v = setjmp(buf), we generate
11947  //
11948  // thisMBB:
11949  // SjLjSetup mainMBB
11950  // bl mainMBB
11951  // v_restore = 1
11952  // b sinkMBB
11953  //
11954  // mainMBB:
11955  // buf[LabelOffset] = LR
11956  // v_main = 0
11957  //
11958  // sinkMBB:
11959  // v = phi(main, restore)
11960  //
11961 
11962  MachineBasicBlock *thisMBB = MBB;
11965  MF->insert(I, mainMBB);
11966  MF->insert(I, sinkMBB);
11967 
11968  MachineInstrBuilder MIB;
11969 
11970  // Transfer the remainder of BB and its successor edges to sinkMBB.
11971  sinkMBB->splice(sinkMBB->begin(), MBB,
11972  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11974 
11975  // Note that the structure of the jmp_buf used here is not compatible
11976  // with that used by libc, and is not designed to be. Specifically, it
11977  // stores only those 'reserved' registers that LLVM does not otherwise
11978  // understand how to spill. Also, by convention, by the time this
11979  // intrinsic is called, Clang has already stored the frame address in the
11980  // first slot of the buffer and stack address in the third. Following the
11981  // X86 target code, we'll store the jump address in the second slot. We also
11982  // need to save the TOC pointer (R2) to handle jumps between shared
11983  // libraries, and that will be stored in the fourth slot. The thread
11984  // identifier (R13) is not affected.
11985 
11986  // thisMBB:
11987  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11988  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11989  const int64_t BPOffset = 4 * PVT.getStoreSize();
11990 
11991  // Prepare IP either in reg.
11992  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11993  Register LabelReg = MRI.createVirtualRegister(PtrRC);
11994  Register BufReg = MI.getOperand(1).getReg();
11995 
11996  if (Subtarget.is64BitELFABI()) {
11998  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11999  .addReg(PPC::X2)
12000  .addImm(TOCOffset)
12001  .addReg(BufReg)
12002  .cloneMemRefs(MI);
12003  }
12004 
12005  // Naked functions never have a base pointer, and so we use r1. For all
12006  // other functions, this decision must be delayed until during PEI.
12007  unsigned BaseReg;
12008  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12009  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12010  else
12011  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12012 
12013  MIB = BuildMI(*thisMBB, MI, DL,
12014  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12015  .addReg(BaseReg)
12016  .addImm(BPOffset)
12017  .addReg(BufReg)
12018  .cloneMemRefs(MI);
12019 
12020  // Setup
12021  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12023 
12024  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12025 
12026  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12027  .addMBB(mainMBB);
12028  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12029 
12030  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12031  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12032 
12033  // mainMBB:
12034  // mainDstReg = 0
12035  MIB =
12036  BuildMI(mainMBB, DL,
12037  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12038 
12039  // Store IP
12040  if (Subtarget.isPPC64()) {
12041  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12042  .addReg(LabelReg)
12043  .addImm(LabelOffset)
12044  .addReg(BufReg);
12045  } else {
12046  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12047  .addReg(LabelReg)
12048  .addImm(LabelOffset)
12049  .addReg(BufReg);
12050  }
12051  MIB.cloneMemRefs(MI);
12052 
12053  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12054  mainMBB->addSuccessor(sinkMBB);
12055 
12056  // sinkMBB:
12057  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12058  TII->get(PPC::PHI), DstReg)
12059  .addReg(mainDstReg).addMBB(mainMBB)
12060  .addReg(restoreDstReg).addMBB(thisMBB);
12061 
12062  MI.eraseFromParent();
12063  return sinkMBB;
12064 }
12065 
12068  MachineBasicBlock *MBB) const {
12069  DebugLoc DL = MI.getDebugLoc();
12070  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12071 
12072  MachineFunction *MF = MBB->getParent();
12074 
12075  MVT PVT = getPointerTy(MF->getDataLayout());
12076  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12077  "Invalid Pointer Size!");
12078 
12079  const TargetRegisterClass *RC =
12080  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12082  // Since FP is only updated here but NOT referenced, it's treated as GPR.
12083  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12084  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12085  unsigned BP =
12086  (PVT == MVT::i64)
12087  ? PPC::X30
12088  : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12089  : PPC::R30);
12090 
12091  MachineInstrBuilder MIB;
12092 
12093  const int64_t LabelOffset = 1 * PVT.getStoreSize();
12094  const int64_t SPOffset = 2 * PVT.getStoreSize();
12095  const int64_t TOCOffset = 3 * PVT.getStoreSize();
12096  const int64_t BPOffset = 4 * PVT.getStoreSize();
12097 
12098  Register BufReg = MI.getOperand(0).getReg();
12099 
12100  // Reload FP (the jumped-to function may not have had a
12101  // frame pointer, and if so, then its r31 will be restored
12102  // as necessary).
12103  if (PVT == MVT::i64) {
12104  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12105  .addImm(0)
12106  .addReg(BufReg);
12107  } else {
12108  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12109  .addImm(0)
12110  .addReg(BufReg);
12111  }
12112  MIB.cloneMemRefs(MI);
12113 
12114  // Reload IP
12115  if (PVT == MVT::i64) {
12116  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12117  .addImm(LabelOffset)
12118  .addReg(BufReg);
12119  } else {
12120  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12121  .addImm(LabelOffset)
12122  .addReg(BufReg);
12123  }
12124  MIB.cloneMemRefs(MI);
12125 
12126  // Reload SP
12127  if (PVT == MVT::i64) {
12128  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12129  .addImm(SPOffset)
12130  .addReg(BufReg);
12131  } else {
12132  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12133  .addImm(SPOffset)
12134  .addReg(BufReg);
12135  }
12136  MIB.cloneMemRefs(MI);
12137 
12138  // Reload BP
12139  if (PVT == MVT::i64) {
12140  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12141  .addImm(BPOffset)
12142  .addReg(BufReg);
12143  } else {
12144  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12145  .addImm(BPOffset)
12146  .addReg(BufReg);
12147  }
12148  MIB.cloneMemRefs(MI);
12149 
12150  // Reload TOC
12151  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12153  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12154  .addImm(TOCOffset)
12155  .addReg(BufReg)
12156  .cloneMemRefs(MI);
12157  }
12158 
12159  // Jump
12160  BuildMI(*MBB, MI, DL,
12161  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12162  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12163 
12164  MI.eraseFromParent();
12165  return MBB;
12166 }
12167 
12169  // If the function specifically requests inline stack probes, emit them.
12170  if (MF.getFunction().hasFnAttribute("probe-stack"))
12171  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12172  "inline-asm";
12173  return false;
12174 }
12175 
12177  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12178  unsigned StackAlign = TFI->getStackAlignment();
12180  "Unexpected stack alignment");
12181  // The default stack probe size is 4096 if the function has no
12182  // stack-probe-size attribute.
12183  const Function &Fn = MF.getFunction();
12184  unsigned StackProbeSize =
12185  Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12186  // Round down to the stack alignment.
12187  StackProbeSize &= ~(StackAlign - 1);
12188  return StackProbeSize ? StackProbeSize : StackAlign;
12189 }
12190 
12191 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12192 // into three phases. In the first phase, it uses pseudo instruction
12193 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12194 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12195 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12196 // MaxCallFrameSize so that it can calculate correct data area pointer.
12199  MachineBasicBlock *MBB) const {
12200  const bool isPPC64 = Subtarget.isPPC64();
12201  MachineFunction *MF = MBB->getParent();
12202  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12203  DebugLoc DL = MI.getDebugLoc();
12204  const unsigned ProbeSize = getStackProbeSize(*MF);
12205  const BasicBlock *ProbedBB = MBB->getBasicBlock();
12207  // The CFG of probing stack looks as
12208  // +-----+
12209  // | MBB |
12210  // +--+--+
12211  // |
12212  // +----v----+
12213  // +--->+ TestMBB +---+
12214  // | +----+----+ |
12215  // | | |
12216  // | +-----v----+ |
12217  // +---+ BlockMBB | |
12218  // +----------+ |
12219  // |
12220  // +---------+ |
12221  // | TailMBB +<--+
12222  // +---------+
12223  // In MBB, calculate previous frame pointer and final stack pointer.
12224  // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12225  // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12226  // TailMBB is spliced via \p MI.
12227  MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12228  MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12229  MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12230 
12231  MachineFunction::iterator MBBIter = ++MBB->getIterator();
12232  MF->insert(MBBIter, TestMBB);
12233  MF->insert(MBBIter, BlockMBB);
12234  MF->insert(MBBIter, TailMBB);
12235 
12236  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12237  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12238 
12239  Register DstReg = MI.getOperand(0).getReg();
12240  Register NegSizeReg = MI.getOperand(1).getReg();
12241  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12242  Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12243  Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12244  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12245 
12246  // Since value of NegSizeReg might be realigned in prologepilog, insert a
12247  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12248  // NegSize.
12249  unsigned ProbeOpc;
12250  if (!MRI.hasOneNonDBGUse(NegSizeReg))
12251  ProbeOpc =
12252  isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12253  else
12254  // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12255  // and NegSizeReg will be allocated in the same phyreg to avoid
12256  // redundant copy when NegSizeReg has only one use which is current MI and
12257  // will be replaced by PREPARE_PROBED_ALLOCA then.
12258  ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12259  : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12260  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12261  .addDef(ActualNegSizeReg)
12262  .addReg(NegSizeReg)
12263  .add(MI.getOperand(2))
12264  .add(MI.getOperand(3));
12265 
12266  // Calculate final stack pointer, which equals to SP + ActualNegSize.
12267  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12268  FinalStackPtr)
12269  .addReg(SPReg)
12270  .addReg(ActualNegSizeReg);
12271 
12272  // Materialize a scratch register for update.
12273  int64_t NegProbeSize = -(int64_t)ProbeSize;
12274  assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12275  Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12276  if (!isInt<16>(NegProbeSize)) {
12277  Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12278  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12279  .addImm(NegProbeSize >> 16);
12280  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12281  ScratchReg)
12282  .addReg(TempReg)
12283  .addImm(NegProbeSize & 0xFFFF);
12284  } else
12285  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12286  .addImm(NegProbeSize);
12287 
12288  {
12289  // Probing leading residual part.
12290  Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12291  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12292  .addReg(ActualNegSizeReg)
12293  .addReg(ScratchReg);
12294  Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12295  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12296  .addReg(Div)
12297  .addReg(ScratchReg);
12298  Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12299  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12300  .addReg(Mul)
12301  .addReg(ActualNegSizeReg);
12302  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12303  .addReg(FramePointer)
12304  .addReg(SPReg)
12305  .addReg(NegMod);
12306  }
12307 
12308  {
12309  // Remaining part should be multiple of ProbeSize.
12310  Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12311  BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12312  .addReg(SPReg)
12313  .addReg(FinalStackPtr);
12314  BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12316  .addReg(CmpResult)
12317  .addMBB(TailMBB);
12318  TestMBB->addSuccessor(BlockMBB);
12319  TestMBB->addSuccessor(TailMBB);
12320  }
12321 
12322  {
12323  // Touch the block.
12324  // |P...|P...|P...
12325  BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12326  .addReg(FramePointer)
12327  .addReg(SPReg)
12328  .addReg(ScratchReg);
12329  BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12330  BlockMBB->addSuccessor(TestMBB);
12331  }
12332 
12333  // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12334  // DYNAREAOFFSET pseudo instruction to get the future result.
12335  Register MaxCallFrameSizeReg =
12336  MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12337  BuildMI(TailMBB, DL,
12338  TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12339  MaxCallFrameSizeReg)
12340  .add(MI.getOperand(2))
12341  .add(MI.getOperand(3));
12342  BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12343  .addReg(SPReg)
12344  .addReg(MaxCallFrameSizeReg);
12345 
12346  // Splice instructions after MI to TailMBB.
12347  TailMBB->splice(TailMBB->end(), MBB,
12348  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12350  MBB->addSuccessor(TestMBB);
12351 
12352  // Delete the pseudo instruction.
12353  MI.eraseFromParent();
12354 
12355  ++NumDynamicAllocaProbed;
12356  return TailMBB;
12357 }
12358 
12361  MachineBasicBlock *BB) const {
12362  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12363  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12364  if (Subtarget.is64BitELFABI() &&
12365  MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12366  !Subtarget.isUsingPCRelativeCalls()) {
12367  // Call lowering should have added an r2 operand to indicate a dependence
12368  // on the TOC base pointer value. It can't however, because there is no
12369  // way to mark the dependence as implicit there, and so the stackmap code
12370  // will confuse it with a regular operand. Instead, add the dependence
12371  // here.
12372  MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12373  }
12374 
12375  return emitPatchPoint(MI, BB);
12376  }
12377 
12378  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12379  MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12380  return emitEHSjLjSetJmp(MI, BB);
12381  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12382  MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12383  return emitEHSjLjLongJmp(MI, BB);
12384  }
12385 
12386  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12387 
12388  // To "insert" these instructions we actually have to insert their
12389  // control-flow patterns.
12390  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12391  MachineFunction::iterator It = ++BB->getIterator();
12392 
12393  MachineFunction *F = BB->getParent();
12394  MachineRegisterInfo &MRI = F->getRegInfo();
12395 
12396  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12397  MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12398  MI.getOpcode() == PPC::SELECT_I8) {
12400  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12401  MI.getOpcode() == PPC::SELECT_CC_I8)
12402  Cond.push_back(MI.getOperand(4));
12403  else
12405  Cond.push_back(MI.getOperand(1));
12406 
12407  DebugLoc dl = MI.getDebugLoc();
12408  TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12409  MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12410  } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12411  MI.getOpcode() == PPC::SELECT_CC_F8 ||
12412  MI.getOpcode() == PPC::SELECT_CC_F16 ||
12413  MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12414  MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12415  MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12416  MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12417  MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12418  MI.getOpcode() == PPC::SELECT_CC_SPE ||
12419  MI.getOpcode() == PPC::SELECT_F4 ||
12420  MI.getOpcode() == PPC::SELECT_F8 ||
12421  MI.getOpcode() == PPC::SELECT_F16 ||
12422  MI.getOpcode() == PPC::SELECT_SPE ||
12423  MI.getOpcode() == PPC::SELECT_SPE4 ||
12424  MI.getOpcode() == PPC::SELECT_VRRC ||
12425  MI.getOpcode() == PPC::SELECT_VSFRC ||
12426  MI.getOpcode() == PPC::SELECT_VSSRC ||
12427  MI.getOpcode() == PPC::SELECT_VSRC) {
12428  // The incoming instruction knows the destination vreg to set, the
12429  // condition code register to branch on, the true/false values to
12430  // select between, and a branch opcode to use.
12431 
12432  // thisMBB:
12433  // ...
12434  // TrueVal = ...
12435  // cmpTY ccX, r1, r2
12436  // bCC copy1MBB
12437  // fallthrough --> copy0MBB
12438  MachineBasicBlock *thisMBB = BB;
12439  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12440  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12441  DebugLoc dl = MI.getDebugLoc();
12442  F->insert(It, copy0MBB);
12443  F->insert(It, sinkMBB);
12444 
12445  // Transfer the remainder of BB and its successor edges to sinkMBB.
12446  sinkMBB->splice(sinkMBB->begin(), BB,
12447  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12449 
12450  // Next, add the true and fallthrough blocks as its successors.
12451  BB->addSuccessor(copy0MBB);
12452  BB->addSuccessor(sinkMBB);
12453 
12454  if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12455  MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12456  MI.getOpcode() == PPC::SELECT_F16 ||
12457  MI.getOpcode() == PPC::SELECT_SPE4 ||
12458  MI.getOpcode() == PPC::SELECT_SPE ||
12459  MI.getOpcode() == PPC::SELECT_VRRC ||
12460  MI.getOpcode() == PPC::SELECT_VSFRC ||
12461  MI.getOpcode() == PPC::SELECT_VSSRC ||
12462  MI.getOpcode() == PPC::SELECT_VSRC) {
12463  BuildMI(BB, dl, TII->get(PPC::BC))
12464  .addReg(MI.getOperand(1).getReg())
12465  .addMBB(sinkMBB);
12466  } else {
12467  unsigned SelectPred = MI.getOperand(4).getImm();
12468  BuildMI(BB, dl, TII->get(PPC::BCC))
12469  .addImm(SelectPred)
12470  .addReg(MI.getOperand(1).getReg())
12471  .addMBB(sinkMBB);
12472  }
12473 
12474  // copy0MBB:
12475  // %FalseValue = ...
12476  // # fallthrough to sinkMBB
12477  BB = copy0MBB;
12478 
12479  // Update machine-CFG edges
12480  BB->addSuccessor(sinkMBB);
12481 
12482  // sinkMBB:
12483  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12484  // ...
12485  BB = sinkMBB;
12486  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12487  .addReg(MI.getOperand(3).getReg())
12488  .addMBB(copy0MBB)
12489  .addReg(MI.getOperand(2).getReg())
12490  .addMBB(thisMBB);
12491  } else if (MI.getOpcode() == PPC::ReadTB) {
12492  // To read the 64-bit time-base register on a 32-bit target, we read the
12493  // two halves. Should the counter have wrapped while it was being read, we
12494  // need to try again.
12495  // ...
12496  // readLoop:
12497  // mfspr Rx,TBU # load from TBU
12498  // mfspr Ry,TB # load from TB
12499  // mfspr Rz,TBU # load from TBU
12500  // cmpw crX,Rx,Rz # check if 'old'='new'
12501  // bne readLoop # branch if they're not equal
12502  // ...
12503 
12504  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12505  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12506  DebugLoc dl = MI.getDebugLoc();
12507  F->insert(It, readMBB);
12508  F->insert(It, sinkMBB);
12509 
12510  // Transfer the remainder of BB and its successor edges to sinkMBB.
12511  sinkMBB->splice(sinkMBB->begin(), BB,
12512  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12514 
12515  BB->addSuccessor(readMBB);
12516  BB = readMBB;
12517 
12518  MachineRegisterInfo &RegInfo = F->getRegInfo();
12519  Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12520  Register LoReg = MI.getOperand(0).getReg();
12521  Register HiReg = MI.getOperand(1).getReg();
12522 
12523  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12524  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12525  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12526 
12527  Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12528 
12529  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12530  .addReg(HiReg)
12531  .addReg(ReadAgainReg);
12532  BuildMI(BB, dl, TII->get(PPC::BCC))
12534  .addReg(CmpReg)
12535  .addMBB(readMBB);
12536 
12537  BB->addSuccessor(readMBB);
12538  BB->addSuccessor(sinkMBB);
12539  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12540  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12541  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12542  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12543  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12544  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12545  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12546  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12547 
12548  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12550  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12552  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12553  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12554  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12555  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12556 
12557  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12559  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12560  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12561  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12562  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12563  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12564  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12565 
12566  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12568  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12570  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12571  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12572  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12573  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12574 
12575  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12576  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12577  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12578  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12579  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12580  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12581  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12582  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12583 
12584  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12585  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12586  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12587  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12588  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12589  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12590  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12591  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12592 
12593  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12594  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
12595  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12596  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
12597  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12598  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
12599  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12600  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
12601 
12602  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12603  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
12604  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12605  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
12606  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12607  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
12608  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12609  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
12610 
12611  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12612  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
12613  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12614  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
12615  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12616  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
12617  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12618  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
12619 
12620  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12621  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
12622  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12623  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
12624  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12625  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
12626  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12627  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
12628 
12629  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12630  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12631  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12632  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12633  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12634  BB = EmitAtomicBinary(MI, BB, 4, 0);
12635  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12636  BB = EmitAtomicBinary(MI, BB, 8, 0);
12637  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12638  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12639  (Subtarget.hasPartwordAtomics() &&
12640  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12641  (Subtarget.hasPartwordAtomics() &&
12642  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12643  bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12644 
12645  auto LoadMnemonic = PPC::LDARX;
12646  auto StoreMnemonic = PPC::STDCX;
12647  switch (MI.getOpcode()) {
12648  default:
12649  llvm_unreachable("Compare and swap of unknown size");
12650  case PPC::ATOMIC_CMP_SWAP_I8:
12651  LoadMnemonic = PPC::LBARX;
12652  StoreMnemonic = PPC::STBCX;
12653  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12654  break;
12655  case PPC::ATOMIC_CMP_SWAP_I16:
12656  LoadMnemonic = PPC::LHARX;
12657  StoreMnemonic = PPC::STHCX;
12658  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12659  break;
12660  case PPC::ATOMIC_CMP_SWAP_I32:
12661  LoadMnemonic = PPC::LWARX;
12662  StoreMnemonic = PPC::STWCX;
12663  break;
12664  case PPC::ATOMIC_CMP_SWAP_I64:
12665  LoadMnemonic = PPC::LDARX;
12666  StoreMnemonic = PPC::STDCX;
12667  break;
12668  }
12669  MachineRegisterInfo &RegInfo = F->getRegInfo();
12670  Register dest = MI.getOperand(0).getReg();
12671  Register ptrA = MI.getOperand(1).getReg();
12672  Register ptrB = MI.getOperand(2).getReg();
12673  Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12674  Register oldval = MI.getOperand(3).getReg();
12675  Register newval = MI.getOperand(4).getReg();
12676  DebugLoc dl = MI.getDebugLoc();
12677 
12678  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12679  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12680  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12681  F->insert(It, loop1MBB);
12682  F->insert(It, loop2MBB);
12683  F->insert(It, exitMBB);
12684  exitMBB->splice(exitMBB->begin(), BB,
12685  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12687 
12688  // thisMBB:
12689  // ...
12690  // fallthrough --> loopMBB
12691  BB->addSuccessor(loop1MBB);
12692 
12693  // loop1MBB:
12694  // l[bhwd]arx dest, ptr
12695  // cmp[wd] dest, oldval
12696  // bne- exitBB
12697  // loop2MBB:
12698  // st[bhwd]cx. newval, ptr
12699  // bne- loopMBB
12700  // b exitBB
12701  // exitBB:
12702  BB = loop1MBB;
12703  BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12704  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
12705  .addReg(dest)
12706  .addReg(oldval);
12707  BuildMI(BB, dl, TII->get(PPC::BCC))
12709  .addReg(CrReg)
12710  .addMBB(exitMBB);
12711  BB->addSuccessor(loop2MBB);
12712  BB->addSuccessor(exitMBB);
12713 
12714  BB = loop2MBB;
12715  BuildMI(BB, dl, TII->get(StoreMnemonic))
12716  .addReg(newval)
12717  .addReg(ptrA)
12718  .addReg(ptrB);
12719  BuildMI(BB, dl, TII->get(PPC::BCC))
12721  .addReg(PPC::CR0)
12722  .addMBB(loop1MBB);
12723  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12724  BB->addSuccessor(loop1MBB);
12725  BB->addSuccessor(exitMBB);
12726 
12727  // exitMBB:
12728  // ...
12729  BB = exitMBB;
12730  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12731  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12732  // We must use 64-bit registers for addresses when targeting 64-bit,
12733  // since we're actually doing arithmetic on them. Other registers
12734  // can be 32-bit.
12735  bool is64bit = Subtarget.isPPC64();
12736  bool isLittleEndian = Subtarget.isLittleEndian();
12737  bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12738 
12739  Register dest = MI.getOperand(0).getReg();
12740  Register ptrA = MI.getOperand(1).getReg();
12741  Register ptrB = MI.getOperand(2).getReg();
12742  Register oldval = MI.getOperand(3).getReg();
12743  Register newval = MI.getOperand(4).getReg();
12744  DebugLoc dl = MI.getDebugLoc();
12745 
12746  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12747  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12748  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12749  F->insert(It, loop1MBB);
12750  F->insert(It, loop2MBB);
12751  F->insert(It, exitMBB);
12752  exitMBB->splice(exitMBB->begin(), BB,
12753  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12755 
12756  MachineRegisterInfo &RegInfo = F->getRegInfo();
12757  const TargetRegisterClass *RC =
12758  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12759  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12760 
12761  Register PtrReg = RegInfo.createVirtualRegister(RC);
12762  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12763  Register ShiftReg =
12764  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12765  Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12766  Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12767  Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12768  Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12769  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12770  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12771  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12772  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12773  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12774  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12775  Register Ptr1Reg;
12776  Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12777  Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12778  Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12779  // thisMBB:
12780  // ...
12781  // fallthrough --> loopMBB
12782  BB->addSuccessor(loop1MBB);
12783 
12784  // The 4-byte load must be aligned, while a char or short may be
12785  // anywhere in the word. Hence all this nasty bookkeeping code.
12786  // add ptr1, ptrA, ptrB [copy if ptrA==0]
12787  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12788  // xori shift, shift1, 24 [16]
12789  // rlwinm ptr, ptr1, 0, 0, 29
12790  // slw newval2, newval, shift
12791  // slw oldval2, oldval,shift
12792  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12793  // slw mask, mask2, shift
12794  // and newval3, newval2, mask
12795  // and oldval3, oldval2, mask
12796  // loop1MBB:
12797  // lwarx tmpDest, ptr
12798  // and tmp, tmpDest, mask
12799  // cmpw tmp, oldval3
12800  // bne- exitBB
12801  // loop2MBB:
12802  // andc tmp2, tmpDest, mask
12803  // or tmp4, tmp2, newval3
12804  // stwcx. tmp4, ptr
12805  // bne- loop1MBB
12806  // b exitBB
12807  // exitBB:
12808  // srw dest, tmpDest, shift
12809  if (ptrA != ZeroReg) {
12810  Ptr1Reg = RegInfo.createVirtualRegister(RC);
12811  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12812  .addReg(ptrA)
12813  .addReg(ptrB);
12814  } else {
12815  Ptr1Reg = ptrB;
12816  }
12817 
12818  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12819  // mode.
12820  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12821  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12822  .addImm(3)
12823  .addImm(27)
12824  .addImm(is8bit ? 28 : 27);
12825  if (!isLittleEndian)
12826  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12827  .addReg(Shift1Reg)
12828  .addImm(is8bit ? 24 : 16);
12829  if (is64bit)
12830  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12831  .addReg(Ptr1Reg)
12832  .addImm(0)
12833  .addImm(61);
12834  else
12835  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12836  .addReg(Ptr1Reg)
12837  .addImm(0)
12838  .addImm(0)
12839  .addImm(29);
12840  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12841  .addReg(newval)
12842  .addReg(ShiftReg);
12843  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12844  .addReg(oldval)
12845  .addReg(ShiftReg);
12846  if (is8bit)
12847  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12848  else {
12849  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12850  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12851  .addReg(Mask3Reg)
12852  .addImm(65535);
12853  }
12854  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12855  .addReg(Mask2Reg)
12856  .addReg(ShiftReg);
12857  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12858  .addReg(NewVal2Reg)
12859  .addReg(MaskReg);
12860  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12861  .addReg(OldVal2Reg)
12862  .addReg(MaskReg);
12863 
12864  BB = loop1MBB;
12865  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12866  .addReg(ZeroReg)
12867  .addReg(PtrReg);
12868  BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12869  .addReg(TmpDestReg)
12870  .addReg(MaskReg);
12871  BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
12872  .addReg(TmpReg)
12873  .addReg(OldVal3Reg);
12874  BuildMI(BB, dl, TII->get(PPC::BCC))
12876  .addReg(CrReg)
12877  .addMBB(exitMBB);
12878  BB->addSuccessor(loop2MBB);
12879  BB->addSuccessor(exitMBB);
12880 
12881  BB = loop2MBB;
12882  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12883  .addReg(TmpDestReg)
12884  .addReg(MaskReg);
12885  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12886  .addReg(Tmp2Reg)
12887  .addReg(NewVal3Reg);
12888  BuildMI(BB, dl, TII->get(PPC::STWCX))
12889  .addReg(Tmp4Reg)
12890  .addReg(ZeroReg)
12891  .addReg(PtrReg);
12892  BuildMI(BB, dl, TII->get(PPC::BCC))
12894  .addReg(PPC::CR0)
12895  .addMBB(loop1MBB);
12896  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12897  BB->addSuccessor(loop1MBB);
12898  BB->addSuccessor(exitMBB);
12899 
12900  // exitMBB:
12901  // ...
12902  BB = exitMBB;
12903  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12904  .addReg(TmpReg)
12905  .addReg(ShiftReg);
12906  } else if (MI.getOpcode() == PPC::FADDrtz) {
12907  // This pseudo performs an FADD with rounding mode temporarily forced
12908  // to round-to-zero. We emit this via custom inserter since the FPSCR
12909  // is not modeled at the SelectionDAG level.
12910  Register Dest = MI.getOperand(0).getReg();
12911  Register Src1 = MI.getOperand(1).getReg();
12912  Register Src2 = MI.getOperand(2).getReg();
12913  DebugLoc dl = MI.getDebugLoc();
12914 
12915  MachineRegisterInfo &RegInfo = F->getRegInfo();
12916  Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12917 
12918  // Save FPSCR value.
12919  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12920 
12921  // Set rounding mode to round-to-zero.
12922  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12923  .addImm(31)
12925 
12926  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12927  .addImm(30)
12929 
12930  // Perform addition.
12931  auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12932  .addReg(Src1)
12933  .addReg(Src2);
12934  if (MI.getFlag(MachineInstr::NoFPExcept))
12936 
12937  // Restore FPSCR value.
12938  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12939  } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12940  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12941  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12942  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12943  unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12944  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12945  ? PPC::ANDI8_rec
12946  : PPC::ANDI_rec;
12947  bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12948  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12949 
12950  MachineRegisterInfo &RegInfo = F->getRegInfo();
12951  Register Dest = RegInfo.createVirtualRegister(
12952  Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12953 
12954  DebugLoc Dl = MI.getDebugLoc();
12955  BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12956  .addReg(MI.getOperand(1).getReg())
12957  .addImm(1);
12958  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12959  MI.getOperand(0).getReg())
12960  .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12961  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12962  DebugLoc Dl = MI.getDebugLoc();
12963  MachineRegisterInfo &RegInfo = F->getRegInfo();
12964  Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12965  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12966  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12967  MI.getOperand(0).getReg())
12968  .addReg(CRReg);
12969  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12970  DebugLoc Dl = MI.getDebugLoc();
12971  unsigned Imm = MI.getOperand(1).getImm();
12972  BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12973  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12974  MI.getOperand(0).getReg())
12975  .addReg(PPC::CR0EQ);
12976  } else if (MI.getOpcode() == PPC::SETRNDi) {
12977  DebugLoc dl = MI.getDebugLoc();
12978  Register OldFPSCRReg = MI.getOperand(0).getReg();
12979 
12980  // Save FPSCR value.
12981  if (MRI.use_empty(OldFPSCRReg))
12982  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12983  else
12984  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12985 
12986  // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12987  // the following settings:
12988  // 00 Round to nearest
12989  // 01 Round to 0
12990  // 10 Round to +inf
12991  // 11 Round to -inf
12992 
12993  // When the operand is immediate, using the two least significant bits of
12994  // the immediate to set the bits 62:63 of FPSCR.
12995  unsigned Mode = MI.getOperand(1).getImm();
12996  BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12997  .addImm(31)
12999 
13000  BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13001  .addImm(30)
13003  } else if (MI.getOpcode() == PPC::SETRND) {
13004  DebugLoc dl = MI.getDebugLoc();
13005 
13006  // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13007  // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13008  // If the target doesn't have DirectMove, we should use stack to do the
13009  // conversion, because the target doesn't have the instructions like mtvsrd
13010  // or mfvsrd to do this conversion directly.
13011  auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13012  if (Subtarget.hasDirectMove()) {
13013  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13014  .addReg(SrcReg);
13015  } else {
13016  // Use stack to do the register copy.
13017  unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13018  MachineRegisterInfo &RegInfo = F->getRegInfo();
13019  const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13020  if (RC == &PPC::F8RCRegClass) {
13021  // Copy register from F8RCRegClass to G8RCRegclass.
13022  assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13023  "Unsupported RegClass.");
13024 
13025  StoreOp = PPC::STFD;
13026  LoadOp = PPC::LD;
13027  } else {
13028  // Copy register from G8RCRegClass to F8RCRegclass.
13029  assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13030  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13031  "Unsupported RegClass.");
13032  }
13033 
13034  MachineFrameInfo &MFI = F->getFrameInfo();
13035  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13036 
13037  MachineMemOperand *MMOStore = F->getMachineMemOperand(
13038  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13040  MFI.getObjectAlign(FrameIdx));
13041 
13042  // Store the SrcReg into the stack.
13043  BuildMI(*BB, MI, dl, TII->get(StoreOp))
13044  .addReg(SrcReg)
13045  .addImm(0)
13046  .addFrameIndex(FrameIdx)
13047  .addMemOperand(MMOStore);
13048 
13049  MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13050  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13052  MFI.getObjectAlign(FrameIdx));
13053 
13054  // Load from the stack where SrcReg is stored, and save to DestReg,
13055  // so we have done the RegClass conversion from RegClass::SrcReg to
13056  // RegClass::DestReg.
13057  BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13058  .addImm(0)
13059  .addFrameIndex(FrameIdx)
13060  .addMemOperand(MMOLoad);
13061  }
13062  };
13063 
13064  Register OldFPSCRReg = MI.getOperand(0).getReg();
13065 
13066  // Save FPSCR value.
13067  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13068 
13069  // When the operand is gprc register, use two least significant bits of the
13070  // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13071  //
13072  // copy OldFPSCRTmpReg, OldFPSCRReg
13073  // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13074  // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13075  // copy NewFPSCRReg, NewFPSCRTmpReg
13076  // mtfsf 255, NewFPSCRReg
13077  MachineOperand SrcOp = MI.getOperand(1);
13078  MachineRegisterInfo &RegInfo = F->getRegInfo();
13079  Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13080 
13081  copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13082 
13083  Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13084  Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13085 
13086  // The first operand of INSERT_SUBREG should be a register which has
13087  // subregisters, we only care about its RegClass, so we should use an
13088  // IMPLICIT_DEF register.
13089  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13090  BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13091  .addReg(ImDefReg)
13092  .add(SrcOp)
13093  .addImm(1);
13094 
13095  Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13096  BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13097  .addReg(OldFPSCRTmpReg)
13098  .addReg(ExtSrcReg)
13099  .addImm(0)
13100  .addImm(62);
13101 
13102  Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13103  copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13104 
13105  // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13106  // bits of FPSCR.
13107  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13108  .addImm(255)
13109  .addReg(NewFPSCRReg)
13110  .addImm(0)
13111  .addImm(0);
13112  } else if (MI.getOpcode() == PPC::SETFLM) {
13113  DebugLoc Dl = MI.getDebugLoc();
13114 
13115  // Result of setflm is previous FPSCR content, so we need to save it first.
13116  Register OldFPSCRReg = MI.getOperand(0).getReg();
13117  if (MRI.use_empty(OldFPSCRReg))
13118  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13119  else
13120  BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13121 
13122  // Put bits in 32:63 to FPSCR.
13123  Register NewFPSCRReg = MI.getOperand(1).getReg();
13124  BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13125  .addImm(255)
13126  .addReg(NewFPSCRReg)
13127  .addImm(0)
13128  .addImm(0);
13129  } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13130  MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13131  return emitProbedAlloca(MI, BB);
13132  } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13133  DebugLoc DL = MI.getDebugLoc();
13134  Register Src = MI.getOperand(2).getReg();
13135  Register Lo = MI.getOperand(0).getReg();
13136  Register Hi = MI.getOperand(1).getReg();
13137  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13138  .addDef(Lo)
13139  .addUse(Src, 0, PPC::sub_gp8_x1);
13140  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13141  .addDef(Hi)
13142  .addUse(Src, 0, PPC::sub_gp8_x0);
13143  } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13144  MI.getOpcode() == PPC::STQX_PSEUDO) {
13145  DebugLoc DL = MI.getDebugLoc();
13146  // Ptr is used as the ptr_rc_no_r0 part
13147  // of LQ/STQ's memory operand and adding result of RA and RB,
13148  // so it has to be g8rc_and_g8rc_nox0.
13149  Register Ptr =
13150  F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13151  Register Val = MI.getOperand(0).getReg();
13152  Register RA = MI.getOperand(1).getReg();
13153  Register RB = MI.getOperand(2).getReg();
13154  BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13155  BuildMI(*BB, MI, DL,
13156  MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13157  : TII->get(PPC::STQ))
13158  .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13159  .addImm(0)
13160  .addReg(Ptr);
13161  } else {
13162  llvm_unreachable("Unexpected instr type to insert");
13163  }
13164 
13165  MI.eraseFromParent(); // The pseudo instruction is gone now.
13166  return BB;
13167 }
13168 
13169 //===----------------------------------------------------------------------===//
13170 // Target Optimization Hooks
13171 //===----------------------------------------------------------------------===//
13172 
13173 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13174  // For the estimates, convergence is quadratic, so we essentially double the
13175  // number of digits correct after every iteration. For both FRE and FRSQRTE,
13176  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13177  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13178  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13179  if (VT.getScalarType() == MVT::f64)
13180  RefinementSteps++;
13181  return RefinementSteps;
13182 }
13183 
13184 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13185  const DenormalMode &Mode) const {
13186  // We only have VSX Vector Test for software Square Root.
13187  EVT VT = Op.getValueType();
13188  if (!isTypeLegal(MVT::i1) ||
13189  (VT != MVT::f64 &&
13190  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13192 
13193  SDLoc DL(Op);
13194  // The output register of FTSQRT is CR field.
13196  // ftsqrt BF,FRB
13197  // Let e_b be the unbiased exponent of the double-precision
13198  // floating-point operand in register FRB.
13199  // fe_flag is set to 1 if either of the following conditions occurs.
13200  // - The double-precision floating-point operand in register FRB is a zero,
13201  // a NaN, or an infinity, or a negative value.
13202  // - e_b is less than or equal to -970.
13203  // Otherwise fe_flag is set to 0.
13204  // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13205  // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13206  // exponent is less than -970)
13207  SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13208  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13209  FTSQRT, SRIdxVal),
13210  0);
13211 }
13212 
13213 SDValue
13214 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13215  SelectionDAG &DAG) const {
13216  // We only have VSX Vector Square Root.
13217  EVT VT = Op.getValueType();
13218  if (VT != MVT::f64 &&
13219  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13221 
13222  return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13223 }
13224 
13225 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13226  int Enabled, int &RefinementSteps,
13227  bool &UseOneConstNR,
13228  bool Reciprocal) const {
13229  EVT VT = Operand.getValueType();
13230  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13231  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13232  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13233  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13234  if (RefinementSteps == ReciprocalEstimate::Unspecified)
13235  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13236 
13237  // The Newton-Raphson computation with a single constant does not provide
13238  // enough accuracy on some CPUs.
13239  UseOneConstNR = !Subtarget.needsTwoConstNR();
13240  return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13241  }
13242  return SDValue();
13243 }
13244 
13245 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13246  int Enabled,
13247  int &RefinementSteps) const {
13248  EVT VT = Operand.getValueType();
13249  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13250  (VT == MVT::f64 && Subtarget.hasFRE()) ||
13251  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13252  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13253  if (RefinementSteps == ReciprocalEstimate::Unspecified)
13254  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13255  return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13256  }
13257  return SDValue();
13258 }
13259 
13260 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13261  // Note: This functionality is used only when unsafe-fp-math is enabled, and
13262  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13263  // enabled for division), this functionality is redundant with the default
13264  // combiner logic (once the division -> reciprocal/multiply transformation
13265  // has taken place). As a result, this matters more for older cores than for
13266  // newer ones.
13267 
13268  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13269  // reciprocal if there are two or more FDIVs (for embedded cores with only
13270  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13271  switch (Subtarget.getCPUDirective()) {
13272  default:
13273  return 3;
13274  case PPC::DIR_440:
13275  case PPC::DIR_A2:
13276  case PPC::DIR_E500:
13277  case PPC::DIR_E500mc:
13278  case PPC::DIR_E5500:
13279  return 2;
13280  }
13281 }
13282 
13283 // isConsecutiveLSLoc needs to work even if all adds have not yet been
13284 // collapsed, and so we need to look through chains of them.
13286  int64_t& Offset, SelectionDAG &DAG) {
13287  if (DAG.isBaseWithConstantOffset(Loc)) {
13288  Base = Loc.getOperand(0);
13289  Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13290 
13291  // The base might itself be a base plus an offset, and if so, accumulate
13292  // that as well.
13294  }
13295 }
13296 
13297 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
13298  unsigned Bytes, int Dist,
13299  SelectionDAG &DAG) {
13300  if (VT.getSizeInBits() / 8 != Bytes)
13301  return false;
13302 
13303  SDValue BaseLoc = Base->getBasePtr();
13304  if (Loc.getOpcode() == ISD::FrameIndex) {
13305  if (BaseLoc.getOpcode() != ISD::FrameIndex)
13306  return false;
13307  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13308  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13309  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13310  int FS = MFI.getObjectSize(FI);
13311  int BFS = MFI.getObjectSize(BFI);
13312  if (FS != BFS || FS != (int)Bytes) return false;
13313  return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13314  }
13315 
13316  SDValue Base1 = Loc, Base2 = BaseLoc;
13317  int64_t Offset1 = 0, Offset2 = 0;
13318  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13319  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13320  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13321  return true;
13322 
13323  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13324  const GlobalValue *GV1 = nullptr;
13325  const GlobalValue *GV2 = nullptr;
13326  Offset1 = 0;
13327  Offset2 = 0;
13328  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13329  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13330  if (isGA1 && isGA2 && GV1 == GV2)
13331  return Offset1 == (Offset2 + Dist*Bytes);
13332  return false;
13333 }
13334 
13335 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13336 // not enforce equality of the chain operands.
13337 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
13338  unsigned Bytes, int Dist,
13339  SelectionDAG &DAG) {
13340  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13341  EVT VT = LS->getMemoryVT();
13342  SDValue Loc = LS->getBasePtr();
13343  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13344  }
13345 
13346  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13347  EVT VT;
13348  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13349  default: return false;
13350  case Intrinsic::ppc_altivec_lvx:
13351  case Intrinsic::ppc_altivec_lvxl:
13352  case Intrinsic::ppc_vsx_lxvw4x:
13353  case Intrinsic::ppc_vsx_lxvw4x_be:
13354  VT = MVT::v4i32;
13355  break;
13356  case Intrinsic::ppc_vsx_lxvd2x:
13357  case Intrinsic::ppc_vsx_lxvd2x_be:
13358  VT = MVT::v2f64;
13359  break;
13360  case Intrinsic::ppc_altivec_lvebx:
13361  VT = MVT::i8;
13362  break;
13363  case Intrinsic::ppc_altivec_lvehx:
13364  VT = MVT::i16;
13365  break;
13366  case Intrinsic::ppc_altivec_lvewx:
13367  VT = MVT::i32;
13368  break;
13369  }
13370 
13371  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13372  }
13373 
13374  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13375  EVT VT;
13376  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13377  default: return false;
13378  case Intrinsic::ppc_altivec_stvx:
13379  case Intrinsic::ppc_altivec_stvxl:
13380  case Intrinsic::ppc_vsx_stxvw4x:
13381  VT = MVT::v4i32;
13382  break;
13383  case Intrinsic::ppc_vsx_stxvd2x:
13384  VT = MVT::v2f64;
13385  break;
13386  case Intrinsic::ppc_vsx_stxvw4x_be:
13387  VT = MVT::v4i32;
13388  break;
13389  case Intrinsic::ppc_vsx_stxvd2x_be:
13390  VT = MVT::v2f64;
13391  break;
13392  case Intrinsic::ppc_altivec_stvebx:
13393  VT = MVT::i8;
13394  break;
13395  case Intrinsic::ppc_altivec_stvehx:
13396  VT = MVT::i16;
13397  break;
13398  case Intrinsic::ppc_altivec_stvewx:
13399  VT = MVT::i32;
13400  break;
13401  }
13402 
13403  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13404  }
13405 
13406  return false;
13407 }
13408 
13409 // Return true is there is a nearyby consecutive load to the one provided
13410 // (regardless of alignment). We search up and down the chain, looking though
13411 // token factors and other loads (but nothing else). As a result, a true result
13412 // indicates that it is safe to create a new consecutive load adjacent to the
13413 // load provided.
13415  SDValue Chain = LD->getChain();
13416  EVT VT = LD->getMemoryVT();
13417 
13418  SmallSet<SDNode *, 16> LoadRoots;
13419  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13420  SmallSet<SDNode *, 16> Visited;
13421 
13422  // First, search up the chain, branching to follow all token-factor operands.
13423  // If we find a consecutive load, then we're done, otherwise, record all
13424  // nodes just above the top-level loads and token factors.
13425  while (!Queue.empty()) {
13426  SDNode *ChainNext = Queue.pop_back_val();
13427  if (!Visited.insert(ChainNext).second)
13428  continue;
13429 
13430  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13431  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13432  return true;
13433 
13434  if (!Visited.count(ChainLD->getChain().getNode()))
13435  Queue.push_back(ChainLD->getChain().getNode());
13436  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13437  for (const SDUse &O : ChainNext->ops())
13438  if (!Visited.count(O.getNode()))
13439  Queue.push_back(O.getNode());
13440  } else
13441  LoadRoots.insert(ChainNext);
13442  }
13443 
13444  // Second, search down the chain, starting from the top-level nodes recorded
13445  // in the first phase. These top-level nodes are the nodes just above all
13446  // loads and token factors. Starting with their uses, recursively look though
13447  // all loads (just the chain uses) and token factors to find a consecutive
13448  // load.
13449  Visited.clear();
13450  Queue.clear();
13451 
13452  for (SDNode *I : LoadRoots) {
13453  Queue.push_back(I);
13454 
13455  while (!Queue.empty()) {
13456  SDNode *LoadRoot = Queue.pop_back_val();
13457  if (!Visited.insert(LoadRoot).second)
13458  continue;
13459 
13460  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13461  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13462  return true;
13463 
13464  for (SDNode *U : LoadRoot->uses())
13465  if (((isa<MemSDNode>(U) &&
13466  cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13467  U->getOpcode() == ISD::TokenFactor) &&
13468  !Visited.count(U))
13469  Queue.push_back(U);
13470  }
13471  }
13472 
13473  return false;
13474 }
13475 
13476 /// This function is called when we have proved that a SETCC node can be replaced
13477 /// by subtraction (and other supporting instructions) so that the result of
13478 /// comparison is kept in a GPR instead of CR. This function is purely for
13479 /// codegen purposes and has some flags to guide the codegen process.
13480 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13481  bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13482  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13483 
13484  // Zero extend the operands to the largest legal integer. Originally, they
13485  // must be of a strictly smaller size.
13486  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13487  DAG.getConstant(Size, DL, MVT::i32));
13488  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13489  DAG.getConstant(Size, DL, MVT::i32));
13490 
13491  // Swap if needed. Depends on the condition code.
13492  if (Swap)
13493  std::swap(Op0, Op1);
13494 
13495  // Subtract extended integers.
13496  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13497 
13498  // Move the sign bit to the least significant position and zero out the rest.
13499  // Now the least significant bit carries the result of original comparison.
13500  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13501  DAG.getConstant(Size - 1, DL, MVT::i32));
13502  auto Final = Shifted;
13503 
13504  // Complement the result if needed. Based on the condition code.
13505  if (Complement)
13506  Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13507  DAG.getConstant(1, DL, MVT::i64));
13508 
13509  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13510 }
13511 
13512 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13513  DAGCombinerInfo &DCI) const {
13514  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13515 
13516  SelectionDAG &DAG = DCI.DAG;
13517  SDLoc DL(N);
13518 
13519  // Size of integers being compared has a critical role in the following
13520  // analysis, so we prefer to do this when all types are legal.
13521  if (!DCI.isAfterLegalizeDAG())
13522  return SDValue();
13523 
13524  // If all users of SETCC extend its value to a legal integer type
13525  // then we replace SETCC with a subtraction
13526  for (const SDNode *U : N->uses())
13527  if (U->getOpcode() != ISD::ZERO_EXTEND)
13528  return SDValue();
13529 
13530  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13531  auto OpSize = N->getOperand(0).getValueSizeInBits();
13532 
13534 
13535  if (OpSize < Size) {
13536  switch (CC) {
13537  default: break;
13538  case ISD::SETULT:
13539  return generateEquivalentSub(N, Size, false, false, DL, DAG);
13540  case ISD::SETULE:
13541  return generateEquivalentSub(N, Size, true, true, DL, DAG);
13542  case ISD::SETUGT:
13543  return generateEquivalentSub(N, Size, false, true, DL, DAG);
13544  case ISD::SETUGE:
13545  return generateEquivalentSub(N, Size, true, false, DL, DAG);
13546  }
13547  }
13548 
13549  return SDValue();
13550 }
13551 
13552 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13553  DAGCombinerInfo &DCI) const {
13554  SelectionDAG &DAG = DCI.DAG;
13555  SDLoc dl(N);
13556 
13557  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13558  // If we're tracking CR bits, we need to be careful that we don't have:
13559  // trunc(binary-ops(zext(x), zext(y)))
13560  // or
13561  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13562  // such that we're unnecessarily moving things into GPRs when it would be
13563  // better to keep them in CR bits.
13564 
13565  // Note that trunc here can be an actual i1 trunc, or can be the effective
13566  // truncation that comes from a setcc or select_cc.
13567  if (N->getOpcode() == ISD::TRUNCATE &&
13568  N->getValueType(0) != MVT::i1)
13569  return SDValue();
13570 
13571  if (N->getOperand(0).getValueType() != MVT::i32 &&
13572  N->getOperand(0).getValueType() != MVT::i64)
13573  return SDValue();
13574 
13575  if (N->getOpcode() == ISD::SETCC ||
13576  N->getOpcode() == ISD::SELECT_CC) {
13577  // If we're looking at a comparison, then we need to make sure that the
13578  // high bits (all except for the first) don't matter the result.
13579  ISD::CondCode CC =
13580  cast<CondCodeSDNode>(N->getOperand(
13581  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13582  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13583 
13584  if (ISD::isSignedIntSetCC(CC)) {
13585  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13586  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13587  return SDValue();
13588  } else if (ISD::isUnsignedIntSetCC(CC)) {
13589  if (!DAG.MaskedValueIsZero(N->getOperand(0),
13590  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13591  !DAG.MaskedValueIsZero(N->getOperand(1),
13592  APInt::getHighBitsSet(OpBits, OpBits-1)))
13593  return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13594  : SDValue());
13595  } else {
13596  // This is neither a signed nor an unsigned comparison, just make sure
13597  // that the high bits are equal.
13598  KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13599  KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13600 
13601  // We don't really care about what is known about the first bit (if
13602  // anything), so pretend that it is known zero for both to ensure they can
13603  // be compared as constants.
13604  Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13605  Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13606 
13607  if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13608  Op1Known.getConstant() != Op2Known.getConstant())
13609  return SDValue();
13610  }
13611  }
13612 
13613  // We now know that the higher-order bits are irrelevant, we just need to
13614  // make sure that all of the intermediate operations are bit operations, and
13615  // all inputs are extensions.
13616  if (N->getOperand(0).getOpcode() != ISD::AND &&
13617  N->getOperand(0).getOpcode() != ISD::OR &&
13618  N->getOperand(0).getOpcode() != ISD::XOR &&
13619  N->getOperand(0).getOpcode() != ISD::SELECT &&
13620  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13621  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13622  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13623  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13624  N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13625  return SDValue();
13626 
13627  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13628  N->getOperand(1).getOpcode() != ISD::AND &&
13629  N->getOperand(1).getOpcode() != ISD::OR &&
13630  N->getOperand(1).getOpcode() != ISD::XOR &&
13631  N->getOperand(1).getOpcode() != ISD::SELECT &&
13632  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13633  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13634  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13635  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13636  N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13637  return SDValue();
13638 
13639  SmallVector<SDValue, 4> Inputs;
13640  SmallVector<SDValue, 8> BinOps, PromOps;
13641  SmallPtrSet<SDNode *, 16> Visited;
13642 
13643  for (unsigned i = 0; i < 2; ++i) {
13644  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13645  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13646  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13647  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13648  isa<ConstantSDNode>(N->getOperand(i)))
13649  Inputs.push_back(N->getOperand(i));
13650  else
13651  BinOps.push_back(N->getOperand(i));
13652 
13653  if (N->getOpcode() == ISD::TRUNCATE)
13654  break;
13655  }
13656 
13657  // Visit all inputs, collect all binary operations (and, or, xor and
13658  // select) that are all fed by extensions.
13659  while (!BinOps.empty()) {
13660  SDValue BinOp = BinOps.pop_back_val();
13661 
13662  if (!Visited.insert(BinOp.getNode()).second)
13663  continue;
13664 
13665  PromOps.push_back(BinOp);
13666 
13667  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13668  // The condition of the select is not promoted.
13669  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13670  continue;
13671  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13672  continue;
13673 
13674  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13675  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13676  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13677  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13678  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13679  Inputs.push_back(BinOp.getOperand(i));
13680  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13681  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13682  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13683  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13684  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13685  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13686  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13687  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13688  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13689  BinOps.push_back(BinOp.getOperand(i));
13690  } else {
13691  // We have an input that is not an extension or another binary
13692  // operation; we'll abort this transformation.
13693  return SDValue();
13694  }
13695  }
13696  }
13697 
13698  // Make sure that this is a self-contained cluster of operations (which
13699  // is not quite the same thing as saying that everything has only one
13700  // use).
13701  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13702  if (isa<ConstantSDNode>(Inputs[i]))
13703  continue;
13704 
13705  for (const SDNode *User : Inputs[i].getNode()->uses()) {
13706  if (User != N && !Visited.count(User))
13707  return SDValue();
13708 
13709  // Make sure that we're not going to promote the non-output-value
13710  // operand(s) or SELECT or SELECT_CC.
13711  // FIXME: Although we could sometimes handle this, and it does occur in
13712  // practice that one of the condition inputs to the select is also one of
13713  // the outputs, we currently can't deal with this.
13714  if (User->getOpcode() == ISD::SELECT) {
13715  if (User->getOperand(0) == Inputs[i])
13716  return SDValue();
13717  } else if (User->getOpcode() == ISD::SELECT_CC) {
13718  if (User->getOperand(0) == Inputs[i] ||
13719  User->getOperand(1) == Inputs[i])
13720  return SDValue();
13721  }
13722  }
13723  }
13724 
13725  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13726  for (const SDNode *User : PromOps[i].getNode()->uses()) {
13727  if (User != N && !Visited.count(User))
13728  return SDValue();
13729 
13730  // Make sure that we're not going to promote the non-output-value
13731  // operand(s) or SELECT or SELECT_CC.
13732  // FIXME: Although we could sometimes handle this, and it does occur in
13733  // practice that one of the condition inputs to the select is also one of
13734  // the outputs, we currently can't deal with this.
13735  if (User->getOpcode() == ISD::SELECT) {
13736  if (User->getOperand(0) == PromOps[i])
13737  return SDValue();
13738  } else if (User->getOpcode() == ISD::SELECT_CC) {
13739  if (User->getOperand(0) == PromOps[i] ||
13740  User->getOperand(1) == PromOps[i])
13741  return SDValue();
13742  }
13743  }
13744  }
13745 
13746  // Replace all inputs with the extension operand.
13747  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13748  // Constants may have users outside the cluster of to-be-promoted nodes,
13749  // and so we need to replace those as we do the promotions.
13750  if (isa<ConstantSDNode>(Inputs[i]))
13751  continue;
13752  else
13753  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13754  }
13755 
13756  std::list<HandleSDNode> PromOpHandles;
13757  for (auto &PromOp : PromOps)
13758  PromOpHandles.emplace_back(PromOp);
13759 
13760  // Replace all operations (these are all the same, but have a different
13761  // (i1) return type). DAG.getNode will validate that the types of
13762  // a binary operator match, so go through the list in reverse so that
13763  // we've likely promoted both operands first. Any intermediate truncations or
13764  // extensions disappear.
13765  while (!PromOpHandles.empty()) {
13766  SDValue PromOp = PromOpHandles.back().getValue();
13767  PromOpHandles.pop_back();
13768 
13769  if (PromOp.getOpcode() == ISD::TRUNCATE ||
13770  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13771  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13772  PromOp.getOpcode() == ISD::ANY_EXTEND) {
13773  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13774  PromOp.getOperand(0).getValueType() != MVT::i1) {
13775  // The operand is not yet ready (see comment below).
13776  PromOpHandles.emplace_front(PromOp);
13777  continue;
13778  }
13779 
13780  SDValue RepValue = PromOp.getOperand(0);
13781  if (isa<ConstantSDNode>(RepValue))
13782  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13783 
13784  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13785  continue;
13786  }
13787 
13788  unsigned C;
13789  switch (PromOp.getOpcode()) {
13790  default: C = 0; break;
13791  case ISD::SELECT: C = 1; break;
13792  case ISD::SELECT_CC: C = 2; break;
13793  }
13794 
13795  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13796  PromOp.getOperand(C).getValueType() != MVT::i1) ||
13797  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13798  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13799  // The to-be-promoted operands of this node have not yet been
13800  // promoted (this should be rare because we're going through the
13801  // list backward, but if one of the operands has several users in
13802  // this cluster of to-be-promoted nodes, it is possible).
13803  PromOpHandles.emplace_front(PromOp);
13804  continue;
13805  }
13806 
13807  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13808  PromOp.getNode()->op_end());
13809 
13810  // If there are any constant inputs, make sure they're replaced now.
13811  for (unsigned i = 0; i < 2; ++i)
13812  if (isa<ConstantSDNode>(Ops[C+i]))
13813  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13814 
13815  DAG.ReplaceAllUsesOfValueWith(PromOp,
13816  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13817  }
13818 
13819  // Now we're left with the initial truncation itself.
13820  if (N->getOpcode() == ISD::TRUNCATE)
13821  return N->getOperand(0);
13822 
13823  // Otherwise, this is a comparison. The operands to be compared have just
13824  // changed type (to i1), but everything else is the same.
13825  return SDValue(N, 0);
13826 }
13827 
13828 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13829  DAGCombinerInfo &DCI) const {
13830  SelectionDAG &DAG = DCI.DAG;
13831  SDLoc dl(N);
13832 
13833  // If we're tracking CR bits, we need to be careful that we don't have:
13834  // zext(binary-ops(trunc(x), trunc(y)))
13835  // or
13836  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13837  // such that we're unnecessarily moving things into CR bits that can more
13838  // efficiently stay in GPRs. Note that if we're not certain that the high
13839  // bits are set as required by the final extension, we still may need to do
13840  // some masking to get the proper behavior.
13841 
13842  // This same functionality is important on PPC64 when dealing with
13843  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13844  // the return values of functions. Because it is so similar, it is handled
13845  // here as well.
13846 
13847  if (N->getValueType(0) != MVT::i32 &&
13848  N->getValueType(0) != MVT::i64)
13849  return SDValue();
13850 
13851  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13852  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13853  return SDValue();
13854 
13855  if (N->getOperand(0).getOpcode() != ISD::AND &&
13856  N->getOperand(0).getOpcode() != ISD::OR &&
13857  N->getOperand(0).getOpcode() != ISD::XOR &&
13858  N->getOperand(0).getOpcode() != ISD::SELECT &&
13859  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13860  return SDValue();
13861 
13862  SmallVector<SDValue, 4> Inputs;
13863  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13864  SmallPtrSet<SDNode *, 16> Visited;
13865 
13866  // Visit all inputs, collect all binary operations (and, or, xor and
13867  // select) that are all fed by truncations.
13868  while (!BinOps.empty()) {
13869  SDValue BinOp = BinOps.pop_back_val();
13870 
13871  if (!Visited.insert(BinOp.getNode()).second)
13872  continue;
13873 
13874  PromOps.push_back(BinOp);
13875 
13876  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13877  // The condition of the select is not promoted.
13878  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13879  continue;
13880  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13881  continue;
13882 
13883  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13884  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13885  Inputs.push_back(BinOp.getOperand(i));
13886  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13887  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13888  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13889  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13890  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13891  BinOps.push_back(BinOp.getOperand(i));
13892  } else {
13893  // We have an input that is not a truncation or another binary
13894  // operation; we'll abort this transformation.
13895  return SDValue();
13896  }
13897  }
13898  }
13899 
13900  // The operands of a select that must be truncated when the select is
13901  // promoted because the operand is actually part of the to-be-promoted set.
13902  DenseMap<SDNode *, EVT> SelectTruncOp[2];
13903 
13904  // Make sure that this is a self-contained cluster of operations (which
13905  // is not quite the same thing as saying that everything has only one
13906  // use).
13907  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13908  if (isa<ConstantSDNode>(Inputs[i]))
13909  continue;
13910 
13911  for (SDNode *User : Inputs[i].getNode()->uses()) {
13912  if (User != N && !Visited.count(User))
13913  return SDValue();
13914 
13915  // If we're going to promote the non-output-value operand(s) or SELECT or
13916  // SELECT_CC, record them for truncation.
13917  if (User->getOpcode() == ISD::SELECT) {
13918  if (User->getOperand(0) == Inputs[i])
13919  SelectTruncOp[0].insert(std::make_pair(User,
13920  User->getOperand(0).getValueType()));
13921  } else if (User->getOpcode() == ISD::SELECT_CC) {
13922  if (User->getOperand(0) == Inputs[i])
13923  SelectTruncOp[0].insert(std::make_pair(User,
13924  User->getOperand(0).getValueType()));
13925  if (User->getOperand(1) == Inputs[i])
13926  SelectTruncOp[1].insert(std::make_pair(User,
13927  User->getOperand(1).getValueType()));
13928  }
13929  }
13930  }
13931 
13932  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13933  for (SDNode *User : PromOps[i].getNode()->uses()) {
13934  if (User != N && !Visited.count(User))
13935  return SDValue();
13936 
13937  // If we're going to promote the non-output-value operand(s) or SELECT or
13938  // SELECT_CC, record them for truncation.
13939  if (User->getOpcode() == ISD::SELECT) {
13940  if (User->getOperand(0) == PromOps[i])
13941  SelectTruncOp[0].insert(std::make_pair(User,
13942  User->getOperand(0).getValueType()));
13943  } else if (User->getOpcode() == ISD::SELECT_CC) {
13944  if (User->getOperand(0) == PromOps[i])
13945  SelectTruncOp[0].insert(std::make_pair(User,
13946  User->getOperand(0).getValueType()));
13947  if (User->getOperand(1) == PromOps[i])
13948  SelectTruncOp[1].insert(std::make_pair(User,
13949  User->getOperand(1).getValueType()));
13950  }
13951  }
13952  }
13953 
13954  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13955  bool ReallyNeedsExt = false;
13956  if (N->getOpcode() != ISD::ANY_EXTEND) {
13957  // If all of the inputs are not already sign/zero extended, then
13958  // we'll still need to do that at the end.
13959  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13960  if (isa<ConstantSDNode>(Inputs[i]))
13961  continue;
13962 
13963  unsigned OpBits =
13964  Inputs[i].getOperand(0).getValueSizeInBits();
13965  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13966 
13967  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13968  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13969  APInt::getHighBitsSet(OpBits,
13970  OpBits-PromBits))) ||
13971  (N->getOpcode() == ISD::SIGN_EXTEND &&
13972  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13973  (OpBits-(PromBits-1)))) {
13974  ReallyNeedsExt = true;
13975  break;
13976  }
13977  }
13978  }
13979 
13980  // Replace all inputs, either with the truncation operand, or a
13981  // truncation or extension to the final output type.
13982  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13983  // Constant inputs need to be replaced with the to-be-promoted nodes that
13984  // use them because they might have users outside of the cluster of
13985  // promoted nodes.
13986  if (isa<ConstantSDNode>(Inputs[i]))
13987  continue;
13988 
13989  SDValue InSrc = Inputs[i].getOperand(0);
13990  if (Inputs[i].getValueType() == N->getValueType(0))
13991  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13992  else if (N->getOpcode() == ISD::SIGN_EXTEND)
13993  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13994  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13995  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13996  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13997  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13998  else
13999  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14000  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14001  }
14002 
14003  std::list<HandleSDNode> PromOpHandles;
14004  for (auto &PromOp : PromOps)
14005  PromOpHandles.emplace_back(PromOp);
14006 
14007  // Replace all operations (these are all the same, but have a different
14008  // (promoted) return type). DAG.getNode will validate that the types of
14009  // a binary operator match, so go through the list in reverse so that
14010  // we've likely promoted both operands first.
14011  while (!PromOpHandles.empty()) {
14012  SDValue PromOp = PromOpHandles.back().getValue();
14013  PromOpHandles.pop_back();
14014 
14015  unsigned C;
14016  switch (PromOp.getOpcode()) {
14017  default: C = 0; break;
14018  case ISD::SELECT: C = 1; break;
14019  case ISD::SELECT_CC: C = 2; break;
14020  }
14021 
14022  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14023  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14024  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14025  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14026  // The to-be-promoted operands of this node have not yet been
14027  // promoted (this should be rare because we're going through the
14028  // list backward, but if one of the operands has several users in
14029  // this cluster of to-be-promoted nodes, it is possible).
14030  PromOpHandles.emplace_front(PromOp);
14031  continue;
14032  }
14033 
14034  // For SELECT and SELECT_CC nodes, we do a similar check for any
14035  // to-be-promoted comparison inputs.
14036  if (PromOp.getOpcode() == ISD::SELECT ||
14037  PromOp.getOpcode() == ISD::SELECT_CC) {
14038  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14039  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14040  (SelectTruncOp[1].count(PromOp.getNode()) &&
14041  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14042  PromOpHandles.emplace_front(PromOp);
14043  continue;
14044  }
14045  }
14046 
14047  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14048  PromOp.getNode()->op_end());
14049 
14050  // If this node has constant inputs, then they'll need to be promoted here.
14051  for (unsigned i = 0; i < 2; ++i) {
14052  if (!isa<ConstantSDNode>(Ops[C+i]))
14053  continue;
14054  if (Ops[C+i].getValueType() == N->getValueType(0))
14055  continue;
14056 
14057  if (N->getOpcode() == ISD::SIGN_EXTEND)
14058  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14059  else if (N->getOpcode() == ISD::ZERO_EXTEND)
14060  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14061  else
14062  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14063  }
14064 
14065  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14066  // truncate them again to the original value type.
14067  if (PromOp.getOpcode() == ISD::SELECT ||
14068  PromOp.getOpcode() == ISD::SELECT_CC) {
14069  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14070  if (SI0 != SelectTruncOp[0].end())
14071  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14072  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14073  if (SI1 != SelectTruncOp[1].end())
14074  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14075  }
14076 
14077  DAG.ReplaceAllUsesOfValueWith(PromOp,
14078  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14079  }
14080 
14081  // Now we're left with the initial extension itself.
14082  if (!ReallyNeedsExt)
14083  return N->getOperand(0);
14084 
14085  // To zero extend, just mask off everything except for the first bit (in the
14086  // i1 case).
14087  if (N->getOpcode() == ISD::ZERO_EXTEND)
14088  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14090  N->getValueSizeInBits(0), PromBits),
14091  dl, N->getValueType(0)));
14092 
14093  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14094  "Invalid extension type");
14095  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14096  SDValue ShiftCst =
14097  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14098  return DAG.getNode(
14099  ISD::SRA, dl, N->getValueType(0),
14100  DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14101  ShiftCst);
14102 }
14103 
14104 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14105  DAGCombinerInfo &DCI) const {
14106  assert(N->getOpcode() == ISD::SETCC &&
14107  "Should be called with a SETCC node");
14108 
14109  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14110  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14111  SDValue LHS = N->getOperand(0);
14112  SDValue RHS = N->getOperand(1);
14113 
14114  // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14115  if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14116  LHS.hasOneUse())
14117  std::swap(LHS, RHS);
14118 
14119  // x == 0-y --> x+y == 0
14120  // x != 0-y --> x+y != 0
14121  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14122  RHS.hasOneUse()) {
14123  SDLoc DL(N);
14124  SelectionDAG &DAG = DCI.DAG;
14125  EVT VT = N->getValueType(0);
14126  EVT OpVT = LHS.getValueType();
14127  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14128  return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14129  }
14130  }
14131 
14132  return DAGCombineTruncBoolExt(N, DCI);
14133 }
14134 
14135 // Is this an extending load from an f32 to an f64?
14136 static bool isFPExtLoad(SDValue Op) {
14137  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14138  return LD->getExtensionType() == ISD::EXTLOAD &&
14139  Op.getValueType() == MVT::f64;
14140  return false;
14141 }
14142 
14143 /// Reduces the number of fp-to-int conversion when building a vector.
14144 ///
14145 /// If this vector is built out of floating to integer conversions,
14146 /// transform it to a vector built out of floating point values followed by a
14147 /// single floating to integer conversion of the vector.
14148 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14149 /// becomes (fptosi (build_vector ($A, $B, ...)))
14150 SDValue PPCTargetLowering::
14151 combineElementTruncationToVectorTruncation(SDNode *N,
14152  DAGCombinerInfo &DCI) const {
14153  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14154  "Should be called with a BUILD_VECTOR node");
14155 
14156  SelectionDAG &DAG = DCI.DAG;
14157  SDLoc dl(N);
14158 
14159  SDValue FirstInput = N->getOperand(0);
14160  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14161  "The input operand must be an fp-to-int conversion.");
14162 
14163  // This combine happens after legalization so the fp_to_[su]i nodes are
14164  // already converted to PPCSISD nodes.
14165  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14166  if (FirstConversion == PPCISD::FCTIDZ ||
14167  FirstConversion == PPCISD::FCTIDUZ ||
14168  FirstConversion == PPCISD::FCTIWZ ||
14169  FirstConversion == PPCISD::FCTIWUZ) {
14170  bool IsSplat = true;
14171  bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14172  FirstConversion == PPCISD::FCTIWUZ;
14173  EVT SrcVT = FirstInput.getOperand(0).getValueType();
14175  EVT TargetVT = N->getValueType(0);
14176  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14177  SDValue NextOp = N->getOperand(i);
14178  if (NextOp.getOpcode() != PPCISD::MFVSR)
14179  return SDValue();
14180  unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14181  if (NextConversion != FirstConversion)
14182  return SDValue();
14183  // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14184  // This is not valid if the input was originally double precision. It is
14185  // also not profitable to do unless this is an extending load in which
14186  // case doing this combine will allow us to combine consecutive loads.
14187  if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14188  return SDValue();
14189  if (N->getOperand(i) != FirstInput)
14190  IsSplat = false;
14191  }
14192 
14193  // If this is a splat, we leave it as-is since there will be only a single
14194  // fp-to-int conversion followed by a splat of the integer. This is better
14195  // for 32-bit and smaller ints and neutral for 64-bit ints.
14196  if (IsSplat)
14197  return SDValue();
14198 
14199  // Now that we know we have the right type of node, get its operands
14200  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14201  SDValue In = N->getOperand(i).getOperand(0);
14202  if (Is32Bit) {
14203  // For 32-bit values, we need to add an FP_ROUND node (if we made it
14204  // here, we know that all inputs are extending loads so this is safe).
14205  if (In.isUndef())
14206  Ops.push_back(DAG.getUNDEF(SrcVT));
14207  else {
14208  SDValue Trunc =
14209  DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14210  DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14211  Ops.push_back(Trunc);
14212  }
14213  } else
14214  Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14215  }
14216 
14217  unsigned Opcode;
14218  if (FirstConversion == PPCISD::FCTIDZ ||
14219  FirstConversion == PPCISD::FCTIWZ)
14220  Opcode = ISD::FP_TO_SINT;
14221  else
14222  Opcode = ISD::FP_TO_UINT;
14223 
14224  EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14225  SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14226  return DAG.getNode(Opcode, dl, TargetVT, BV);
14227  }
14228  return SDValue();
14229 }
14230 
14231 /// Reduce the number of loads when building a vector.
14232 ///
14233 /// Building a vector out of multiple loads can be converted to a load
14234 /// of the vector type if the loads are consecutive. If the loads are
14235 /// consecutive but in descending order, a shuffle is added at the end
14236 /// to reorder the vector.
14238  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14239  "Should be called with a BUILD_VECTOR node");
14240 
14241  SDLoc dl(N);
14242 
14243  // Return early for non byte-sized type, as they can't be consecutive.
14244  if (!N->getValueType(0).getVectorElementType().isByteSized())
14245  return SDValue();
14246 
14247  bool InputsAreConsecutiveLoads = true;
14248  bool InputsAreReverseConsecutive = true;
14249  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14250  SDValue FirstInput = N->getOperand(0);
14251  bool IsRoundOfExtLoad = false;
14252  LoadSDNode *FirstLoad = nullptr;
14253 
14254  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14255  FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14256  FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14257  IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14258  }
14259  // Not a build vector of (possibly fp_rounded) loads.
14260  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14261  N->getNumOperands() == 1)
14262  return SDValue();
14263 
14264  if (!IsRoundOfExtLoad)
14265  FirstLoad = cast<LoadSDNode>(FirstInput);
14266 
14267  SmallVector<LoadSDNode *, 4> InputLoads;
14268  InputLoads.push_back(FirstLoad);
14269  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14270  // If any inputs are fp_round(extload), they all must be.
14271  if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14272  return SDValue();
14273 
14274  SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14275  N->getOperand(i);
14276  if (NextInput.getOpcode() != ISD::LOAD)
14277  return SDValue();
14278 
14279  SDValue PreviousInput =
14280  IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14281  LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14282  LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14283 
14284  // If any inputs are fp_round(extload), they all must be.
14285  if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14286  return SDValue();
14287 
14288  // We only care about regular loads. The PPC-specific load intrinsics
14289  // will not lead to a merge opportunity.
14290  if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14291  InputsAreConsecutiveLoads = false;
14292  if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14293  InputsAreReverseConsecutive = false;
14294 
14295  // Exit early if the loads are neither consecutive nor reverse consecutive.
14296  if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14297  return SDValue();
14298  InputLoads.push_back(LD2);
14299  }
14300 
14301  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14302  "The loads cannot be both consecutive and reverse consecutive.");
14303 
14304  SDValue WideLoad;
14305  SDValue ReturnSDVal;
14306  if (InputsAreConsecutiveLoads) {
14307  assert(FirstLoad && "Input needs to be a LoadSDNode.");
14308  WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14309  FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14310  FirstLoad->getAlign());
14311  ReturnSDVal = WideLoad;
14312  } else if (InputsAreReverseConsecutive) {
14313  LoadSDNode *LastLoad = InputLoads.back();
14314  assert(LastLoad && "Input needs to be a LoadSDNode.");
14315  WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14316  LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14317  LastLoad->getAlign());
14319  for (int i = N->getNumOperands() - 1; i >= 0; i--)
14320  Ops.push_back(i);
14321 
14322  ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14323  DAG.getUNDEF(N->getValueType(0)), Ops);
14324  } else
14325  return SDValue();
14326 
14327  for (auto *LD : InputLoads)
14328  DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14329  return ReturnSDVal;
14330 }
14331 
14332 // This function adds the required vector_shuffle needed to get
14333 // the elements of the vector extract in the correct position
14334 // as specified by the CorrectElems encoding.
14336  SDValue Input, uint64_t Elems,
14337  uint64_t CorrectElems) {
14338  SDLoc dl(N);
14339 
14340  unsigned NumElems = Input.getValueType().getVectorNumElements();
14341  SmallVector<int, 16> ShuffleMask(NumElems, -1);
14342 
14343  // Knowing the element indices being extracted from the original
14344  // vector and the order in which they're being inserted, just put
14345  // them at element indices required for the instruction.
14346  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14347  if (DAG.getDataLayout().isLittleEndian())
14348  ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14349  else
14350  ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14351  CorrectElems = CorrectElems >> 8;
14352  Elems = Elems >> 8;
14353  }
14354 
14355  SDValue Shuffle =
14356  DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14357  DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14358 
14359  EVT VT = N->getValueType(0);
14360  SDValue Conv = DAG.getBitcast(VT, Shuffle);
14361 
14362  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14363  Input.getValueType().getVectorElementType(),
14364  VT.getVectorNumElements());
14365  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14366  DAG.getValueType(ExtVT));
14367 }
14368 
14369 // Look for build vector patterns where input operands come from sign
14370 // extended vector_extract elements of specific indices. If the correct indices
14371 // aren't used, add a vector shuffle to fix up the indices and create
14372 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14373 // during instruction selection.
14375  // This array encodes the indices that the vector sign extend instructions
14376  // extract from when extending from one type to another for both BE and LE.
14377  // The right nibble of each byte corresponds to the LE incides.
14378  // and the left nibble of each byte corresponds to the BE incides.
14379  // For example: 0x3074B8FC byte->word
14380  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14381  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14382  // For example: 0x000070F8 byte->double word
14383  // For LE: the allowed indices are: 0x0,0x8
14384  // For BE: the allowed indices are: 0x7,0xF
14385  uint64_t TargetElems[] = {
14386  0x3074B8FC, // b->w
14387  0x000070F8, // b->d
14388  0x10325476, // h->w
14389  0x00003074, // h->d
14390  0x00001032, // w->d
14391  };
14392 
14393  uint64_t Elems = 0;
14394  int Index;
14395  SDValue Input;
14396 
14397  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14398  if (!Op)
14399  return false;
14400  if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14401  Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14402  return false;
14403 
14404  // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14405  // of the right width.
14406  SDValue Extract = Op.getOperand(0);
14407  if (Extract.getOpcode() == ISD::ANY_EXTEND)
14408  Extract = Extract.getOperand(0);
14409  if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14410  return false;
14411 
14412  ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14413  if (!ExtOp)
14414  return false;
14415 
14416  Index = ExtOp->getZExtValue();
14417  if (Input && Input != Extract.getOperand(0))
14418  return false;
14419 
14420  if (!Input)
14421  Input = Extract.getOperand(0);
14422 
14423  Elems = Elems << 8;
14424  Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14425  Elems |= Index;
14426 
14427  return true;
14428  };
14429 
14430  // If the build vector operands aren't sign extended vector extracts,
14431  // of the same input vector, then return.
14432  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14433  if (!isSExtOfVecExtract(N->getOperand(i))) {
14434  return SDValue();
14435  }
14436  }
14437 
14438  // If the vector extract indicies are not correct, add the appropriate
14439  // vector_shuffle.
14440  int TgtElemArrayIdx;
14441  int InputSize = Input.getValueType().getScalarSizeInBits();
14442  int OutputSize = N->getValueType(0).getScalarSizeInBits();
14443  if (InputSize + OutputSize == 40)
14444  TgtElemArrayIdx = 0;
14445  else if (InputSize + OutputSize == 72)
14446  TgtElemArrayIdx = 1;
14447  else if (InputSize + OutputSize == 48)
14448  TgtElemArrayIdx = 2;
14449  else if (InputSize + OutputSize == 80)
14450  TgtElemArrayIdx = 3;
14451  else if (InputSize + OutputSize == 96)
14452  TgtElemArrayIdx = 4;
14453  else
14454  return SDValue();
14455 
14456  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14457  CorrectElems = DAG.getDataLayout().isLittleEndian()
14458  ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14459  : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14460  if (Elems != CorrectElems) {
14461  return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14462  }
14463 
14464  // Regular lowering will catch cases where a shuffle is not needed.
14465  return SDValue();
14466 }
14467 
14468 // Look for the pattern of a load from a narrow width to i128, feeding
14469 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14470 // (LXVRZX). This node represents a zero extending load that will be matched
14471 // to the Load VSX Vector Rightmost instructions.
14473  SDLoc DL(N);
14474 
14475  // This combine is only eligible for a BUILD_VECTOR of v1i128.
14476  if (N->getValueType(0) != MVT::v1i128)
14477  return SDValue();
14478 
14479  SDValue Operand = N->getOperand(0);
14480  // Proceed with the transformation if the operand to the BUILD_VECTOR
14481  // is a load instruction.
14482  if (Operand.getOpcode() != ISD::LOAD)
14483  return SDValue();
14484 
14485  auto *LD = cast<LoadSDNode>(Operand);
14486  EVT MemoryType = LD->getMemoryVT();
14487 
14488  // This transformation is only valid if the we are loading either a byte,
14489  // halfword, word, or doubleword.
14490  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14492 
14493  // Ensure that the load from the narrow width is being zero extended to i128.
14494  if (!ValidLDType ||
14495  (LD->getExtensionType() != ISD::ZEXTLOAD &&
14496  LD->getExtensionType() != ISD::EXTLOAD))
14497  return SDValue();
14498 
14499  SDValue LoadOps[] = {
14500  LD->getChain(), LD->getBasePtr(),
14501  DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14502 
14505  LoadOps, MemoryType, LD->getMemOperand());
14506 }
14507 
14508 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14509  DAGCombinerInfo &DCI) const {
14510  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14511  "Should be called with a BUILD_VECTOR node");
14512 
14513  SelectionDAG &DAG = DCI.DAG;
14514  SDLoc dl(N);
14515 
14516  if (!Subtarget.hasVSX())
14517  return SDValue();
14518 
14519  // The target independent DAG combiner will leave a build_vector of
14520  // float-to-int conversions intact. We can generate MUCH better code for
14521  // a float-to-int conversion of a vector of floats.
14522  SDValue FirstInput = N->getOperand(0);
14523  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14524  SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14525  if (Reduced)
14526  return Reduced;
14527  }
14528 
14529  // If we're building a vector out of consecutive loads, just load that
14530  // vector type.
14531  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14532  if (Reduced)
14533  return Reduced;
14534 
14535  // If we're building a vector out of extended elements from another vector
14536  // we have P9 vector integer extend instructions. The code assumes legal
14537  // input types (i.e. it can't handle things like v4i16) so do not run before
14538  // legalization.
14539  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14540  Reduced = combineBVOfVecSExt(N, DAG);
14541  if (Reduced)
14542  return Reduced;
14543  }
14544 
14545  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14546  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14547  // is a load from <valid narrow width> to i128.
14548  if (Subtarget.isISA3_1()) {
14549  SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14550  if (BVOfZLoad)
14551  return BVOfZLoad;
14552  }
14553 
14554  if (N->getValueType(0) != MVT::v2f64)
14555  return SDValue();
14556 
14557  // Looking for:
14558  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14559  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14560  FirstInput.getOpcode() != ISD::UINT_TO_FP)
14561  return SDValue();
14562  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14563  N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14564  return SDValue();
14565  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14566  return SDValue();
14567 
14568  SDValue Ext1 = FirstInput.getOperand(0);
14569  SDValue Ext2 = N->getOperand(1).getOperand(0);
14570  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14572  return SDValue();
14573 
14574  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14575  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14576  if (!Ext1Op || !Ext2Op)
14577  return SDValue();
14578  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14579  Ext1.getOperand(0) != Ext2.getOperand(0))
14580  return SDValue();
14581 
14582  int FirstElem = Ext1Op->getZExtValue();
14583  int SecondElem = Ext2Op->getZExtValue();
14584  int SubvecIdx;
14585  if (FirstElem == 0 && SecondElem == 1)
14586  SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14587  else if (FirstElem == 2 && SecondElem == 3)
14588  SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14589  else
14590  return SDValue();
14591 
14592  SDValue SrcVec = Ext1.getOperand(0);
14593  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14595  return DAG.getNode(NodeType, dl, MVT::v2f64,
14596  SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14597 }
14598 
14599 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14600  DAGCombinerInfo &DCI) const {
14601  assert((N->getOpcode() == ISD::SINT_TO_FP ||
14602  N->getOpcode() == ISD::UINT_TO_FP) &&
14603  "Need an int -> FP conversion node here");
14604 
14605  if (useSoftFloat() || !Subtarget.has64BitSupport())
14606  return SDValue();
14607 
14608  SelectionDAG &DAG = DCI.DAG;
14609  SDLoc dl(N);
14610  SDValue Op(N, 0);
14611 
14612  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14613  // from the hardware.
14614  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14615  return SDValue();
14616  if (!Op.getOperand(0).getValueType().isSimple())
14617  return SDValue();
14618  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14619  Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14620  return SDValue();
14621 
14622  SDValue FirstOperand(Op.getOperand(0));
14623  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14624  (FirstOperand.getValueType() == MVT::i8 ||
14625  FirstOperand.getValueType() == MVT::i16);
14626  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14627  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14628  bool DstDouble = Op.getValueType() == MVT::f64;
14629  unsigned ConvOp = Signed ?
14630  (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14631  (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14632  SDValue WidthConst =
14633  DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14634  dl, false);
14635  LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14636  SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14639  Ops, MVT::i8, LDN->getMemOperand());
14640 
14641  // For signed conversion, we need to sign-extend the value in the VSR
14642  if (Signed) {
14643  SDValue ExtOps[] = { Ld, WidthConst };
14644  SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14645  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14646  } else
14647  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14648  }
14649 
14650 
14651  // For i32 intermediate values, unfortunately, the conversion functions
14652  // leave the upper 32 bits of the value are undefined. Within the set of
14653  // scalar instructions, we have no method for zero- or sign-extending the
14654  // value. Thus, we cannot handle i32 intermediate values here.
14655  if (Op.getOperand(0).getValueType() == MVT::i32)
14656  return SDValue();
14657 
14658  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14659  "UINT_TO_FP is supported only with FPCVT");
14660 
14661  // If we have FCFIDS, then use it when converting to single-precision.
14662  // Otherwise, convert to double-precision and then round.
14663  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14664  ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14665  : PPCISD::FCFIDS)
14666  : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14667  : PPCISD::FCFID);
14668  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14669  ? MVT::f32
14670  : MVT::f64;
14671 
14672  // If we're converting from a float, to an int, and back to a float again,
14673  // then we don't need the store/load pair at all.
14674  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14675  Subtarget.hasFPCVT()) ||
14676  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14677  SDValue Src = Op.getOperand(0).getOperand(0);
14678  if (Src.getValueType() == MVT::f32) {
14679  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14680  DCI.AddToWorklist(Src.getNode());
14681  } else if (Src.getValueType() != MVT::f64) {
14682  // Make sure that we don't pick up a ppc_fp128 source value.
14683  return SDValue();
14684  }
14685 
14686  unsigned FCTOp =
14687  Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14689 
14690  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14691  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14692 
14693  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14694  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
14695  DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
14696  DCI.AddToWorklist(FP.getNode());
14697  }
14698 
14699  return FP;
14700  }
14701 
14702  return SDValue();
14703 }
14704 
14705 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14706 // builtins) into loads with swaps.
14708  DAGCombinerInfo &DCI) const {
14709  // Delay VSX load for LE combine until after LegalizeOps to prioritize other
14710  // load combines.
14711  if (DCI.isBeforeLegalizeOps())
14712  return SDValue();
14713 
14714  SelectionDAG &DAG = DCI.DAG;
14715  SDLoc dl(N);
14716  SDValue Chain;
14717  SDValue Base;
14718  MachineMemOperand *MMO;
14719 
14720  switch (N->getOpcode()) {
14721  default:
14722  llvm_unreachable("Unexpected opcode for little endian VSX load");
14723  case ISD::LOAD: {
14724  LoadSDNode *LD = cast<LoadSDNode>(N);
14725  Chain = LD->getChain();
14726  Base = LD->getBasePtr();
14727  MMO = LD->getMemOperand();
14728  // If the MMO suggests this isn't a load of a full vector, leave
14729  // things alone. For a built-in, we have to make the change for
14730  // correctness, so if there is a size problem that will be a bug.
14731  if (MMO->getSize() < 16)
14732  return SDValue();
14733  break;
14734  }
14735  case ISD::INTRINSIC_W_CHAIN: {
14736  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14737  Chain = Intrin->getChain();
14738  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14739  // us what we want. Get operand 2 instead.
14740  Base = Intrin->getOperand(2);
14741  MMO = Intrin->getMemOperand();
14742  break;
14743  }
14744  }
14745 
14746  MVT VecTy = N->getValueType(0).getSimpleVT();
14747 
14748  SDValue LoadOps[] = { Chain, Base };
14751  LoadOps, MVT::v2f64, MMO);
14752 
14753  DCI.AddToWorklist(Load.getNode());
14754  Chain = Load.getValue(1);
14755  SDValue Swap = DAG.getNode(
14756  PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14757  DCI.AddToWorklist(Swap.getNode());
14758 
14759  // Add a bitcast if the resulting load type doesn't match v2f64.
14760  if (VecTy != MVT::v2f64) {
14761  SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14762  DCI.AddToWorklist(N.getNode());
14763  // Package {bitcast value, swap's chain} to match Load's shape.
14764  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14765  N, Swap.getValue(1));
14766  }
14767 
14768  return Swap;
14769 }
14770 
14771 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14772 // builtins) into stores with swaps.
14774  DAGCombinerInfo &DCI) const {
14775  // Delay VSX store for LE combine until after LegalizeOps to prioritize other
14776  // store combines.
14777  if (DCI.isBeforeLegalizeOps())
14778  return SDValue();
14779 
14780  SelectionDAG &DAG = DCI.DAG;
14781  SDLoc dl(N);
14782  SDValue Chain;
14783  SDValue Base;
14784  unsigned SrcOpnd;
14785  MachineMemOperand *MMO;
14786 
14787  switch (N->getOpcode()) {
14788  default:
14789  llvm_unreachable("Unexpected opcode for little endian VSX store");
14790  case ISD::STORE: {
14791  StoreSDNode *ST = cast<StoreSDNode>(N);
14792  Chain = ST->getChain();
14793  Base = ST->getBasePtr();
14794  MMO = ST->getMemOperand();
14795  SrcOpnd = 1;
14796  // If the MMO suggests this isn't a store of a full vector, leave
14797  // things alone. For a built-in, we have to make the change for
14798  // correctness, so if there is a size problem that will be a bug.
14799  if (MMO->getSize() < 16)
14800  return SDValue();
14801  break;
14802  }
14803  case ISD::INTRINSIC_VOID: {
14804  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14805  Chain = Intrin->getChain();
14806  // Intrin->getBasePtr() oddly does not get what we want.
14807  Base = Intrin->getOperand(3);
14808  MMO = Intrin->getMemOperand();
14809  SrcOpnd = 2;
14810  break;
14811  }
14812  }
14813 
14814  SDValue Src = N->getOperand(SrcOpnd);
14815  MVT VecTy = Src.getValueType().getSimpleVT();
14816 
14817  // All stores are done as v2f64 and possible bit cast.
14818  if (VecTy != MVT::v2f64) {
14819  Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14820  DCI.AddToWorklist(Src.getNode());
14821  }
14822 
14823  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14824  DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14825  DCI.AddToWorklist(Swap.getNode());
14826  Chain = Swap.getValue(1);
14827  SDValue StoreOps[] = { Chain, Swap, Base };
14829  DAG.getVTList(MVT::Other),
14830  StoreOps, VecTy, MMO);
14831  DCI.AddToWorklist(Store.getNode());
14832  return Store;
14833 }
14834 
14835 // Handle DAG combine for STORE (FP_TO_INT F).
14836 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14837  DAGCombinerInfo &DCI) const {
14838 
14839  SelectionDAG &DAG = DCI.DAG;
14840  SDLoc dl(N);
14841  unsigned Opcode = N->getOperand(1).getOpcode();
14842 
14843  assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14844  && "Not a FP_TO_INT Instruction!");
14845 
14846  SDValue Val = N->getOperand(1).getOperand(0);
14847  EVT Op1VT = N->getOperand(1).getValueType();
14848  EVT ResVT = Val.getValueType();
14849 
14850  if (!isTypeLegal(ResVT))
14851  return SDValue();
14852 
14853  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14854  bool ValidTypeForStoreFltAsInt =
14855  (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14856  (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14857 
14858  if (ResVT == MVT::f128 && !Subtarget.hasP9Vector())
14859  return SDValue();
14860 
14861  if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14862  cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14863  return SDValue();
14864 
14865  // Extend f32 values to f64
14866  if (ResVT.getScalarSizeInBits() == 32) {
14867  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14868  DCI.AddToWorklist(Val.getNode());
14869  }
14870 
14871  // Set signed or unsigned conversion opcode.
14872  unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14875 
14876  Val = DAG.getNode(ConvOpcode,
14877  dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14878  DCI.AddToWorklist(Val.getNode());
14879 
14880  // Set number of bytes being converted.
14881  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14882  SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14883  DAG.getIntPtrConstant(ByteSize, dl, false),
14884  DAG.getValueType(Op1VT) };
14885 
14887  DAG.getVTList(MVT::Other), Ops,
14888  cast<StoreSDNode>(N)->getMemoryVT(),
14889  cast<StoreSDNode>(N)->getMemOperand());
14890 
14891  DCI.AddToWorklist(Val.getNode());
14892  return Val;
14893 }
14894 
14895 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14896  // Check that the source of the element keeps flipping
14897  // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14898  bool PrevElemFromFirstVec = Mask[0] < NumElts;
14899  for (int i = 1, e = Mask.size(); i < e; i++) {
14900  if (PrevElemFromFirstVec && Mask[i] < NumElts)
14901  return false;
14902  if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14903  return false;
14904  PrevElemFromFirstVec = !PrevElemFromFirstVec;
14905  }
14906  return true;
14907 }
14908 
14909 static bool isSplatBV(SDValue Op) {
14910  if (Op.getOpcode() != ISD::BUILD_VECTOR)
14911  return false;
14912  SDValue FirstOp;
14913 
14914  // Find first non-undef input.
14915  for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14916  FirstOp = Op.getOperand(i);
14917  if (!FirstOp.isUndef())
14918  break;
14919  }
14920 
14921  // All inputs are undef or the same as the first non-undef input.
14922  for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14923  if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14924  return false;
14925  return true;
14926 }
14927 
14929  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14930  return Op;
14931  if (Op.getOpcode() != ISD::BITCAST)
14932  return SDValue();
14933  Op = Op.getOperand(0);
14934  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14935  return Op;
14936  return SDValue();
14937 }
14938 
14939 // Fix up the shuffle mask to account for the fact that the result of
14940 // scalar_to_vector is not in lane zero. This just takes all values in
14941 // the ranges specified by the min/max indices and adds the number of
14942 // elements required to ensure each element comes from the respective
14943 // position in the valid lane.
14944 // On little endian, that's just the corresponding element in the other
14945 // half of the vector. On big endian, it is in the same half but right
14946 // justified rather than left justified in that half.
14948  int LHSMaxIdx, int RHSMinIdx,
14949  int RHSMaxIdx, int HalfVec,
14950  unsigned ValidLaneWidth,
14951  const PPCSubtarget &Subtarget) {
14952  for (int i = 0, e = ShuffV.size(); i < e; i++) {
14953  int Idx = ShuffV[i];
14954  if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14955  ShuffV[i] +=
14956  Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14957  }
14958 }
14959 
14960 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14961 // the original is:
14962 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14963 // In such a case, just change the shuffle mask to extract the element
14964 // from the permuted index.
14966  const PPCSubtarget &Subtarget) {
14967  SDLoc dl(OrigSToV);
14968  EVT VT = OrigSToV.getValueType();
14969  assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14970  "Expecting a SCALAR_TO_VECTOR here");
14971  SDValue Input = OrigSToV.getOperand(0);
14972 
14973  if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14974  ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14975  SDValue OrigVector = Input.getOperand(0);
14976 
14977  // Can't handle non-const element indices or different vector types
14978  // for the input to the extract and the output of the scalar_to_vector.
14979  if (Idx && VT == OrigVector.getValueType()) {
14980  unsigned NumElts = VT.getVectorNumElements();
14981  assert(
14982  NumElts > 1 &&
14983  "Cannot produce a permuted scalar_to_vector for one element vector");
14984  SmallVector<int, 16> NewMask(NumElts, -1);
14985  unsigned ResultInElt = NumElts / 2;
14986  ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
14987  NewMask[ResultInElt] = Idx->getZExtValue();
14988  return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14989  }
14990  }
14991  return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14992  OrigSToV.getOperand(0));
14993 }
14994 
14995 // On little endian subtargets, combine shuffles such as:
14996 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14997 // into:
14998 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14999 // because the latter can be matched to a single instruction merge.
15000 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15001 // to put the value into element zero. Adjust the shuffle mask so that the
15002 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
15003 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
15004 // nodes with elements smaller than doubleword because all the ways
15005 // of getting scalar data into a vector register put the value in the
15006 // rightmost element of the left half of the vector.
15007 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15008  SelectionDAG &DAG) const {
15009  SDValue LHS = SVN->getOperand(0);
15010  SDValue RHS = SVN->getOperand(1);
15011  auto Mask = SVN->getMask();
15012  int NumElts = LHS.getValueType().getVectorNumElements();
15013  SDValue Res(SVN, 0);
15014  SDLoc dl(SVN);
15015  bool IsLittleEndian = Subtarget.isLittleEndian();
15016 
15017  // On big endian targets this is only useful for subtargets with direct moves.
15018  // On little endian targets it would be useful for all subtargets with VSX.
15019  // However adding special handling for LE subtargets without direct moves
15020  // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15021  // which includes direct moves.
15022  if (!Subtarget.hasDirectMove())
15023  return Res;
15024 
15025  // If this is not a shuffle of a shuffle and the first element comes from
15026  // the second vector, canonicalize to the commuted form. This will make it
15027  // more likely to match one of the single instruction patterns.
15028  if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15029  RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15030  std::swap(LHS, RHS);
15031  Res = DAG.getCommutedVectorShuffle(*SVN);
15032  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15033  }
15034 
15035  // Adjust the shuffle mask if either input vector comes from a
15036  // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15037  // form (to prevent the need for a swap).
15038  SmallVector<int, 16> ShuffV(Mask);
15039  SDValue SToVLHS = isScalarToVec(LHS);
15040  SDValue SToVRHS = isScalarToVec(RHS);
15041  if (SToVLHS || SToVRHS) {
15042  // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15043  // same type and have differing element sizes, then do not perform
15044  // the following transformation. The current transformation for
15045  // SCALAR_TO_VECTOR assumes that both input vectors have the same
15046  // element size. This will be updated in the future to account for
15047  // differing sizes of the LHS and RHS.
15048  if (SToVLHS && SToVRHS &&
15049  (SToVLHS.getValueType().getScalarSizeInBits() !=
15050  SToVRHS.getValueType().getScalarSizeInBits()))
15051  return Res;
15052 
15053  int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15054  : SToVRHS.getValueType().getVectorNumElements();
15055  int NumEltsOut = ShuffV.size();
15056  // The width of the "valid lane" (i.e. the lane that contains the value that
15057  // is vectorized) needs to be expressed in terms of the number of elements
15058  // of the shuffle. It is thereby the ratio of the values before and after
15059  // any bitcast.
15060  unsigned ValidLaneWidth =
15061  SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15062  LHS.getValueType().getScalarSizeInBits()
15063  : SToVRHS.getValueType().getScalarSizeInBits() /
15064  RHS.getValueType().getScalarSizeInBits();
15065 
15066  // Initially assume that neither input is permuted. These will be adjusted
15067  // accordingly if either input is.
15068  int LHSMaxIdx = -1;
15069  int RHSMinIdx = -1;
15070  int RHSMaxIdx = -1;
15071  int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15072 
15073  // Get the permuted scalar to vector nodes for the source(s) that come from
15074  // ISD::SCALAR_TO_VECTOR.
15075  // On big endian systems, this only makes sense for element sizes smaller
15076  // than 64 bits since for 64-bit elements, all instructions already put
15077  // the value into element zero. Since scalar size of LHS and RHS may differ
15078  // after isScalarToVec, this should be checked using their own sizes.
15079  if (SToVLHS) {
15080  if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15081  return Res;
15082  // Set up the values for the shuffle vector fixup.
15083  LHSMaxIdx = NumEltsOut / NumEltsIn;
15084  SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15085  if (SToVLHS.getValueType() != LHS.getValueType())
15086  SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15087  LHS = SToVLHS;
15088  }
15089  if (SToVRHS) {
15090  if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15091  return Res;
15092  RHSMinIdx = NumEltsOut;
15093  RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15094  SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15095  if (SToVRHS.getValueType() != RHS.getValueType())
15096  SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15097  RHS = SToVRHS;
15098  }
15099 
15100  // Fix up the shuffle mask to reflect where the desired element actually is.
15101  // The minimum and maximum indices that correspond to element zero for both
15102  // the LHS and RHS are computed and will control which shuffle mask entries
15103  // are to be changed. For example, if the RHS is permuted, any shuffle mask
15104  // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15105  fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15106  HalfVec, ValidLaneWidth, Subtarget);
15107  Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15108 
15109  // We may have simplified away the shuffle. We won't be able to do anything
15110  // further with it here.
15111  if (!isa<ShuffleVectorSDNode>(Res))
15112  return Res;
15113  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15114  }
15115 
15116  SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15117  // The common case after we commuted the shuffle is that the RHS is a splat
15118  // and we have elements coming in from the splat at indices that are not
15119  // conducive to using a merge.
15120  // Example:
15121  // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15122  if (!isSplatBV(TheSplat))
15123  return Res;
15124 
15125  // We are looking for a mask such that all even elements are from
15126  // one vector and all odd elements from the other.
15127  if (!isAlternatingShuffMask(Mask, NumElts))
15128  return Res;
15129 
15130  // Adjust the mask so we are pulling in the same index from the splat
15131  // as the index from the interesting vector in consecutive elements.
15132  if (IsLittleEndian) {
15133  // Example (even elements from first vector):
15134  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15135  if (Mask[0] < NumElts)
15136  for (int i = 1, e = Mask.size(); i < e; i += 2) {
15137  if (ShuffV[i] < 0)
15138  continue;
15139  ShuffV[i] = (ShuffV[i - 1] + NumElts);
15140  }
15141  // Example (odd elements from first vector):
15142  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15143  else
15144  for (int i = 0, e = Mask.size(); i < e; i += 2) {
15145  if (ShuffV[i] < 0)
15146  continue;
15147  ShuffV[i] = (ShuffV[i + 1] + NumElts);
15148  }
15149  } else {
15150  // Example (even elements from first vector):
15151  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15152  if (Mask[0] < NumElts)
15153  for (int i = 0, e = Mask.size(); i < e; i += 2) {
15154  if (ShuffV[i] < 0)
15155  continue;
15156  ShuffV[i] = ShuffV[i + 1] - NumElts;
15157  }
15158  // Example (odd elements from first vector):
15159  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15160  else
15161  for (int i = 1, e = Mask.size(); i < e; i += 2) {
15162  if (ShuffV[i] < 0)
15163  continue;
15164  ShuffV[i] = ShuffV[i - 1] - NumElts;
15165  }
15166  }
15167 
15168  // If the RHS has undefs, we need to remove them since we may have created
15169  // a shuffle that adds those instead of the splat value.
15170  SDValue SplatVal =
15171  cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15172  TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15173 
15174  if (IsLittleEndian)
15175  RHS = TheSplat;
15176  else
15177  LHS = TheSplat;
15178  return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15179 }
15180 
15181 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15182  LSBaseSDNode *LSBase,
15183  DAGCombinerInfo &DCI) const {
15184  assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15185  "Not a reverse memop pattern!");
15186 
15187  auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15188  auto Mask = SVN->getMask();
15189  int i = 0;
15190  auto I = Mask.rbegin();
15191  auto E = Mask.rend();
15192 
15193  for (; I != E; ++I) {
15194  if (*I != i)
15195  return false;
15196  i++;
15197  }
15198  return true;
15199  };
15200 
15201  SelectionDAG &DAG = DCI.DAG;
15202  EVT VT = SVN->getValueType(0);
15203 
15204  if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15205  return SDValue();
15206 
15207  // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15208  // See comment in PPCVSXSwapRemoval.cpp.
15209  // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15210  if (!Subtarget.hasP9Vector())
15211  return SDValue();
15212 
15213  if(!IsElementReverse(SVN))
15214  return SDValue();
15215 
15216  if (LSBase->getOpcode() == ISD::LOAD) {
15217  // If the load return value 0 has more than one user except the
15218  // shufflevector instruction, it is not profitable to replace the
15219  // shufflevector with a reverse load.
15220  for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15221  UI != UE; ++UI)
15222  if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15223  return SDValue();
15224 
15225  SDLoc dl(LSBase);
15226  SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15227  return DAG.getMemIntrinsicNode(
15229  LSBase->getMemoryVT(), LSBase->getMemOperand());
15230  }
15231 
15232  if (LSBase->getOpcode() == ISD::STORE) {
15233  // If there are other uses of the shuffle, the swap cannot be avoided.
15234  // Forcing the use of an X-Form (since swapped stores only have
15235  // X-Forms) without removing the swap is unprofitable.
15236  if (!SVN->hasOneUse())
15237  return SDValue();
15238 
15239  SDLoc dl(LSBase);
15240  SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15241  LSBase->getBasePtr()};
15242  return DAG.getMemIntrinsicNode(
15243  PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15244  LSBase->getMemoryVT(), LSBase->getMemOperand());
15245  }
15246 
15247  llvm_unreachable("Expected a load or store node here");
15248 }
15249 
15250 static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15251  unsigned IntrinsicID =
15252  cast<ConstantSDNode>(Intrin.getOperand(1))->getZExtValue();
15253  if (IntrinsicID == Intrinsic::ppc_stdcx)
15254  StoreWidth = 8;
15255  else if (IntrinsicID == Intrinsic::ppc_stwcx)
15256  StoreWidth = 4;
15257  else if (IntrinsicID == Intrinsic::ppc_sthcx)
15258  StoreWidth = 2;
15259  else if (IntrinsicID == Intrinsic::ppc_stbcx)
15260  StoreWidth = 1;
15261  else
15262  return false;
15263  return true;
15264 }
15265 
15267  DAGCombinerInfo &DCI) const {
15268  SelectionDAG &DAG = DCI.DAG;
15269  SDLoc dl(N);
15270  switch (N->getOpcode()) {
15271  default: break;
15272  case ISD::ADD:
15273  return combineADD(N, DCI);
15274  case ISD::SHL:
15275  return combineSHL(N, DCI);
15276  case ISD::SRA:
15277  return combineSRA(N, DCI);
15278  case ISD::SRL:
15279  return combineSRL(N, DCI);
15280  case ISD::MUL:
15281  return combineMUL(N, DCI);
15282  case ISD::FMA:
15283  case PPCISD::FNMSUB:
15284  return combineFMALike(N, DCI);
15285  case PPCISD::SHL:
15286  if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15287  return N->getOperand(0);
15288  break;
15289  case PPCISD::SRL:
15290  if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15291  return N->getOperand(0);
15292  break;
15293  case PPCISD::SRA:
15294  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15295  if (C->isZero() || // 0 >>s V -> 0.
15296  C->isAllOnes()) // -1 >>s V -> -1.
15297  return N->getOperand(0);
15298  }
15299  break;
15300  case ISD::SIGN_EXTEND:
15301  case ISD::ZERO_EXTEND:
15302  case ISD::ANY_EXTEND:
15303  return DAGCombineExtBoolTrunc(N, DCI);
15304  case ISD::TRUNCATE:
15305  return combineTRUNCATE(N, DCI);
15306  case ISD::SETCC:
15307  if (SDValue CSCC = combineSetCC(N, DCI))
15308  return CSCC;
15309  [[fallthrough]];
15310  case ISD::SELECT_CC:
15311  return DAGCombineTruncBoolExt(N, DCI);
15312  case ISD::SINT_TO_FP:
15313  case ISD::UINT_TO_FP:
15314  return combineFPToIntToFP(N, DCI);
15315  case ISD::VECTOR_SHUFFLE:
15316  if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15317  LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15318  return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15319  }
15320  return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15321  case ISD::STORE: {
15322 
15323  EVT Op1VT = N->getOperand(1).getValueType();
15324  unsigned Opcode = N->getOperand(1).getOpcode();
15325 
15326  if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
15327  SDValue Val= combineStoreFPToInt(N, DCI);
15328  if (Val)
15329  return Val;
15330  }
15331 
15332  if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15333  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15334  SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15335  if (Val)
15336  return Val;
15337  }
15338 
15339  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15340  if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15341  N->getOperand(1).getNode()->hasOneUse() &&
15342  (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15343  (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15344 
15345  // STBRX can only handle simple types and it makes no sense to store less
15346  // two bytes in byte-reversed order.
15347  EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15348  if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15349  break;
15350 
15351  SDValue BSwapOp = N->getOperand(1).getOperand(0);
15352  // Do an any-extend to 32-bits if this is a half-word input.
15353  if (BSwapOp.getValueType() == MVT::i16)
15354  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15355 
15356  // If the type of BSWAP operand is wider than stored memory width
15357  // it need to be shifted to the right side before STBRX.
15358  if (Op1VT.bitsGT(mVT)) {
15359  int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15360  BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15361  DAG.getConstant(Shift, dl, MVT::i32));
15362  // Need to truncate if this is a bswap of i64 stored as i32/i16.
15363  if (Op1VT == MVT::i64)
15364  BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15365  }
15366 
15367  SDValue Ops[] = {
15368  N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15369  };
15370  return
15372  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15373  cast<StoreSDNode>(N)->getMemOperand());
15374  }
15375 
15376  // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15377  // So it can increase the chance of CSE constant construction.
15378  if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15379  isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15380  // Need to sign-extended to 64-bits to handle negative values.
15381  EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15382  uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15383  MemVT.getSizeInBits());
15384  SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15385 
15386  // DAG.getTruncStore() can't be used here because it doesn't accept
15387  // the general (base + offset) addressing mode.
15388  // So we use UpdateNodeOperands and setTruncatingStore instead.
15389  DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15390  N->getOperand(3));
15391  cast<StoreSDNode>(N)->setTruncatingStore(true);
15392  return SDValue(N, 0);
15393  }
15394 
15395  // For little endian, VSX stores require generating xxswapd/lxvd2x.
15396  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15397  if (Op1VT.isSimple()) {
15398  MVT StoreVT = Op1VT.getSimpleVT();
15399  if (Subtarget.needsSwapsForVSXMemOps() &&
15400  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15401  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15402  return expandVSXStoreForLE(N, DCI);
15403  }
15404  break;
15405  }
15406  case ISD::LOAD: {
15407  LoadSDNode *LD = cast<LoadSDNode>(N);
15408  EVT VT = LD->getValueType(0);
15409 
15410  // For little endian, VSX loads require generating lxvd2x/xxswapd.
15411  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15412  if (VT.isSimple()) {
15413  MVT LoadVT = VT.getSimpleVT();
15414  if (Subtarget.needsSwapsForVSXMemOps() &&
15415  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15416  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15417  return expandVSXLoadForLE(N, DCI);
15418  }
15419 
15420  // We sometimes end up with a 64-bit integer load, from which we extract
15421  // two single-precision floating-point numbers. This happens with
15422  // std::complex<float>, and other similar structures, because of the way we
15423  // canonicalize structure copies. However, if we lack direct moves,
15424  // then the final bitcasts from the extracted integer values to the
15425  // floating-point numbers turn into store/load pairs. Even with direct moves,
15426  // just loading the two floating-point numbers is likely better.
15427  auto ReplaceTwoFloatLoad = [&]() {
15428  if (VT != MVT::i64)
15429  return false;
15430 
15431  if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15432  LD->isVolatile())
15433  return false;
15434 
15435  // We're looking for a sequence like this:
15436  // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15437  // t16: i64 = srl t13, Constant:i32<32>
15438  // t17: i32 = truncate t16
15439  // t18: f32 = bitcast t17
15440  // t19: i32 = truncate t13
15441  // t20: f32 = bitcast t19
15442 
15443  if (!LD->hasNUsesOfValue(2, 0))
15444  return false;
15445 
15446  auto UI = LD->use_begin();
15447  while (UI.getUse().getResNo() != 0) ++UI;
15448  SDNode *Trunc = *UI++;
15449  while (UI.getUse().getResNo() != 0) ++UI;
15450  SDNode *RightShift = *UI;
15451  if (Trunc->getOpcode() != ISD::TRUNCATE)
15452  std::swap(Trunc, RightShift);
15453 
15454  if (Trunc->getOpcode() != ISD::TRUNCATE ||
15455  Trunc->getValueType(0) != MVT::i32 ||
15456  !Trunc->hasOneUse())
15457  return false;
15458  if (RightShift->getOpcode() != ISD::SRL ||
15459  !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15460  RightShift->getConstantOperandVal(1) != 32 ||
15461  !RightShift->hasOneUse())
15462  return false;
15463 
15464  SDNode *Trunc2 = *RightShift->use_begin();
15465  if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15466  Trunc2->getValueType(0) != MVT::i32 ||
15467  !Trunc2->hasOneUse())
15468  return false;
15469 
15470  SDNode *Bitcast = *Trunc->use_begin();
15471  SDNode *Bitcast2 = *Trunc2->use_begin();
15472 
15473  if (Bitcast->getOpcode() != ISD::BITCAST ||
15474  Bitcast->getValueType(0) != MVT::f32)
15475  return false;
15476  if (Bitcast2->getOpcode() != ISD::BITCAST ||
15477  Bitcast2->getValueType(0) != MVT::f32)
15478  return false;
15479 
15480  if (Subtarget.isLittleEndian())
15481  std::swap(Bitcast, Bitcast2);
15482 
15483  // Bitcast has the second float (in memory-layout order) and Bitcast2
15484  // has the first one.
15485 
15486  SDValue BasePtr = LD->getBasePtr();
15487  if (LD->isIndexed()) {
15488  assert(LD->getAddressingMode() == ISD::PRE_INC &&
15489  "Non-pre-inc AM on PPC?");
15490  BasePtr =
15491  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15492  LD->getOffset());
15493  }
15494 
15495  auto MMOFlags =
15496  LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15497  SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15498  LD->getPointerInfo(), LD->getAlign(),
15499  MMOFlags, LD->getAAInfo());
15500  SDValue AddPtr =
15501  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15502  BasePtr, DAG.getIntPtrConstant(4, dl));
15503  SDValue FloatLoad2 = DAG.getLoad(
15504  MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15505  LD->getPointerInfo().getWithOffset(4),
15506  commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15507 
15508  if (LD->isIndexed()) {
15509  // Note that DAGCombine should re-form any pre-increment load(s) from
15510  // what is produced here if that makes sense.
15511  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15512  }
15513 
15514  DCI.CombineTo(Bitcast2, FloatLoad);
15515  DCI.CombineTo(Bitcast, FloatLoad2);
15516 
15517  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15518  SDValue(FloatLoad2.getNode(), 1));
15519  return true;
15520  };
15521 
15522  if (ReplaceTwoFloatLoad())
15523  return SDValue(N, 0);
15524 
15525  EVT MemVT = LD->getMemoryVT();
15526  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15527  Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15528  if (LD->isUnindexed() && VT.isVector() &&
15529  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15530  // P8 and later hardware should just use LOAD.
15531  !Subtarget.hasP8Vector() &&
15532  (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15533  VT == MVT::v4f32))) &&
15534  LD->getAlign() < ABIAlignment) {
15535  // This is a type-legal unaligned Altivec load.
15536  SDValue Chain = LD->getChain();
15537  SDValue Ptr = LD->getBasePtr();
15538  bool isLittleEndian = Subtarget.isLittleEndian();
15539 
15540  // This implements the loading of unaligned vectors as described in
15541  // the venerable Apple Velocity Engine overview. Specifically:
15542  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15543  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15544  //
15545  // The general idea is to expand a sequence of one or more unaligned
15546  // loads into an alignment-based permutation-control instruction (lvsl
15547  // or lvsr), a series of regular vector loads (which always truncate
15548  // their input address to an aligned address), and a series of
15549  // permutations. The results of these permutations are the requested
15550  // loaded values. The trick is that the last "extra" load is not taken
15551  // from the address you might suspect (sizeof(vector) bytes after the
15552  // last requested load), but rather sizeof(vector) - 1 bytes after the
15553  // last requested vector. The point of this is to avoid a page fault if
15554  // the base address happened to be aligned. This works because if the
15555  // base address is aligned, then adding less than a full vector length
15556  // will cause the last vector in the sequence to be (re)loaded.
15557  // Otherwise, the next vector will be fetched as you might suspect was
15558  // necessary.
15559 
15560  // We might be able to reuse the permutation generation from
15561  // a different base address offset from this one by an aligned amount.
15562  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15563  // optimization later.
15564  Intrinsic::ID Intr, IntrLD, IntrPerm;
15565  MVT PermCntlTy, PermTy, LDTy;
15566  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15567  : Intrinsic::ppc_altivec_lvsl;
15568  IntrLD = Intrinsic::ppc_altivec_lvx;
15569  IntrPerm = Intrinsic::ppc_altivec_vperm;
15570  PermCntlTy = MVT::v16i8;
15571  PermTy = MVT::v4i32;
15572  LDTy = MVT::v4i32;
15573 
15574  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15575 
15576  // Create the new MMO for the new base load. It is like the original MMO,
15577  // but represents an area in memory almost twice the vector size centered
15578  // on the original address. If the address is unaligned, we might start
15579  // reading up to (sizeof(vector)-1) bytes below the address of the
15580  // original unaligned load.
15581  MachineFunction &MF = DAG.getMachineFunction();
15582  MachineMemOperand *BaseMMO =
15583  MF.getMachineMemOperand(LD->getMemOperand(),
15584  -(int64_t)MemVT.getStoreSize()+1,
15585  2*MemVT.getStoreSize()-1);
15586 
15587  // Create the new base load.
15588  SDValue LDXIntID =
15589  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15590  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15591  SDValue BaseLoad =
15593  DAG.getVTList(PermTy, MVT::Other),
15594  BaseLoadOps, LDTy, BaseMMO);
15595 
15596  // Note that the value of IncOffset (which is provided to the next
15597  // load's pointer info offset value, and thus used to calculate the
15598  // alignment), and the value of IncValue (which is actually used to
15599  // increment the pointer value) are different! This is because we
15600  // require the next load to appear to be aligned, even though it
15601  // is actually offset from the base pointer by a lesser amount.
15602  int IncOffset = VT.getSizeInBits() / 8;
15603  int IncValue = IncOffset;
15604 
15605  // Walk (both up and down) the chain looking for another load at the real
15606  // (aligned) offset (the alignment of the other load does not matter in
15607  // this case). If found, then do not use the offset reduction trick, as
15608  // that will prevent the loads from being later combined (as they would
15609  // otherwise be duplicates).
15610  if (!findConsecutiveLoad(LD, DAG))
15611  --IncValue;
15612 
15613  SDValue Increment =
15614  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15615  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15616 
15617  MachineMemOperand *ExtraMMO =
15618  MF.getMachineMemOperand(LD->getMemOperand(),
15619  1, 2*MemVT.getStoreSize()-1);
15620  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15621  SDValue ExtraLoad =
15623  DAG.getVTList(PermTy, MVT::Other),
15624  ExtraLoadOps, LDTy, ExtraMMO);
15625 
15627  BaseLoad.getValue(1), ExtraLoad.getValue(1));
15628 
15629  // Because vperm has a big-endian bias, we must reverse the order
15630  // of the input vectors and complement the permute control vector
15631  // when generating little endian code. We have already handled the
15632  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15633  // and ExtraLoad here.
15634  SDValue Perm;
15635  if (isLittleEndian)
15636  Perm = BuildIntrinsicOp(IntrPerm,
15637  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15638  else
15639  Perm = BuildIntrinsicOp(IntrPerm,
15640  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15641 
15642  if (VT != PermTy)
15643  Perm = Subtarget.hasAltivec()
15644  ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15645  : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15646  DAG.getTargetConstant(1, dl, MVT::i64));
15647  // second argument is 1 because this rounding
15648  // is always exact.
15649 
15650  // The output of the permutation is our loaded result, the TokenFactor is
15651  // our new chain.
15652  DCI.CombineTo(N, Perm, TF);
15653  return SDValue(N, 0);
15654  }
15655  }
15656  break;
15657  case ISD::INTRINSIC_WO_CHAIN: {
15658  bool isLittleEndian = Subtarget.isLittleEndian();
15659  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15660  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15661  : Intrinsic::ppc_altivec_lvsl);
15662  if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15663  SDValue Add = N->getOperand(1);
15664 
15665  int Bits = 4 /* 16 byte alignment */;
15666 
15667  if (DAG.MaskedValueIsZero(Add->getOperand(1),
15668  APInt::getAllOnes(Bits /* alignment */)
15669  .zext(Add.getScalarValueSizeInBits()))) {
15670  SDNode *BasePtr = Add->getOperand(0).getNode();
15671  for (SDNode *U : BasePtr->uses()) {
15672  if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15673  cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15674  // We've found another LVSL/LVSR, and this address is an aligned
15675  // multiple of that one. The results will be the same, so use the
15676  // one we've just found instead.
15677 
15678  return SDValue(U, 0);
15679  }
15680  }
15681  }
15682 
15683  if (isa<ConstantSDNode>(Add->getOperand(1))) {
15684  SDNode *BasePtr = Add->getOperand(0).getNode();
15685  for (SDNode *U : BasePtr->uses()) {
15686  if (U->getOpcode() == ISD::ADD &&
15687  isa<ConstantSDNode>(U->getOperand(1)) &&
15688  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15689  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15690  (1ULL << Bits) ==
15691  0) {
15692  SDNode *OtherAdd = U;
15693  for (SDNode *V : OtherAdd->uses()) {
15694  if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15695  cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15696  IID) {
15697  return SDValue(V, 0);
15698  }
15699  }
15700  }
15701  }
15702  }
15703  }
15704 
15705  // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15706  // Expose the vabsduw/h/b opportunity for down stream
15707  if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15708  (IID == Intrinsic::ppc_altivec_vmaxsw ||
15709  IID == Intrinsic::ppc_altivec_vmaxsh ||
15710  IID == Intrinsic::ppc_altivec_vmaxsb)) {
15711  SDValue V1 = N->getOperand(1);
15712  SDValue V2 = N->getOperand(2);
15713  if ((V1.getSimpleValueType() == MVT::v4i32 ||
15714  V1.getSimpleValueType() == MVT::v8i16 ||
15715  V1.getSimpleValueType() == MVT::v16i8) &&
15716  V1.getSimpleValueType() == V2.getSimpleValueType()) {
15717  // (0-a, a)
15718  if (V1.getOpcode() == ISD::SUB &&
15720  V1.getOperand(1) == V2) {
15721  return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15722  }
15723  // (a, 0-a)
15724  if (V2.getOpcode() == ISD::SUB &&
15725  ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15726  V2.getOperand(1) == V1) {
15727  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15728  }
15729  // (x-y, y-x)
15730  if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15731  V1.getOperand(0) == V2.getOperand(1) &&
15732  V1.getOperand(1) == V2.getOperand(0)) {
15733  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15734  }
15735  }
15736  }
15737  }
15738 
15739  break;
15741  // For little endian, VSX loads require generating lxvd2x/xxswapd.
15742  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15743  if (Subtarget.needsSwapsForVSXMemOps()) {
15744  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15745  default:
15746  break;
15747  case Intrinsic::ppc_vsx_lxvw4x:
15748  case Intrinsic::ppc_vsx_lxvd2x:
15749  return expandVSXLoadForLE(N, DCI);
15750  }
15751  }
15752  break;
15753  case ISD::INTRINSIC_VOID:
15754  // For little endian, VSX stores require generating xxswapd/stxvd2x.
15755  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15756  if (Subtarget.needsSwapsForVSXMemOps()) {
15757  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15758  default:
15759  break;
15760  case Intrinsic::ppc_vsx_stxvw4x:
15761  case Intrinsic::ppc_vsx_stxvd2x:
15762  return expandVSXStoreForLE(N, DCI);
15763  }
15764  }
15765  break;
15766  case ISD::BSWAP: {
15767  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15768  // For subtargets without LDBRX, we can still do better than the default
15769  // expansion even for 64-bit BSWAP (LOAD).
15770  bool Is64BitBswapOn64BitTgt =
15771  Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
15772  bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
15773  N->getOperand(0).hasOneUse();
15774  if (IsSingleUseNormalLd &&
15775  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15776  (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15777  SDValue Load = N->getOperand(0);
15778  LoadSDNode *LD = cast<LoadSDNode>(Load);
15779  // Create the byte-swapping load.
15780  SDValue Ops[] = {
15781  LD->getChain(), // Chain
15782  LD->getBasePtr(), // Ptr
15783  DAG.getValueType(N->getValueType(0)) // VT
15784  };
15785  SDValue BSLoad =
15787  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15789  Ops, LD->getMemoryVT(), LD->getMemOperand());
15790 
15791  // If this is an i16 load, insert the truncate.
15792  SDValue ResVal = BSLoad;
15793  if (N->getValueType(0) == MVT::i16)
15794  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15795 
15796  // First, combine the bswap away. This makes the value produced by the
15797  // load dead.
15798  DCI.CombineTo(N, ResVal);
15799 
15800  // Next, combine the load away, we give it a bogus result value but a real
15801  // chain result. The result value is dead because the bswap is dead.
15802  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15803 
15804  // Return N so it doesn't get rechecked!
15805  return SDValue(N, 0);
15806  }
15807  // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
15808  // before legalization so that the BUILD_PAIR is handled correctly.
15809  if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
15810  !IsSingleUseNormalLd)
15811  return SDValue();
15812  LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
15813 
15814  // Can't split volatile or atomic loads.
15815  if (!LD->isSimple())
15816  return SDValue();
15817  SDValue BasePtr = LD->getBasePtr();
15818  SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
15819  LD->getPointerInfo(), LD->getAlign());
15820  Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
15821  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15822  DAG.getIntPtrConstant(4, dl));
15824  LD->getMemOperand(), 4, 4);
15825  SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
15826  Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
15827  SDValue Res;
15828  if (Subtarget.isLittleEndian())
15829  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
15830  else
15831  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
15832  SDValue TF =
15834  Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15835  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
15836  return Res;
15837  }
15838  case PPCISD::VCMP:
15839  // If a VCMP_rec node already exists with exactly the same operands as this
15840  // node, use its result instead of this node (VCMP_rec computes both a CR6
15841  // and a normal output).
15842  //
15843  if (!N->getOperand(0).hasOneUse() &&
15844  !N->getOperand(1).hasOneUse() &&
15845  !N->getOperand(2).hasOneUse()) {
15846 
15847  // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15848  SDNode *VCMPrecNode = nullptr;
15849 
15850  SDNode *LHSN = N->getOperand(0).getNode();
15851  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15852  UI != E; ++UI)
15853  if (UI->getOpcode() == PPCISD::VCMP_rec &&
15854  UI->getOperand(1) == N->getOperand(1) &&
15855  UI->getOperand(2) == N->getOperand(2) &&
15856  UI->getOperand(0) == N->getOperand(0)) {
15857  VCMPrecNode = *UI;
15858  break;
15859  }
15860 
15861  // If there is no VCMP_rec node, or if the flag value has a single use,
15862  // don't transform this.
15863  if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15864  break;
15865 
15866  // Look at the (necessarily single) use of the flag value. If it has a
15867  // chain, this transformation is more complex. Note that multiple things
15868  // could use the value result, which we should ignore.
15869  SDNode *FlagUser = nullptr;
15870  for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15871  FlagUser == nullptr; ++UI) {
15872  assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15873  SDNode *User = *UI;
15874  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15875  if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15876  FlagUser = User;
15877  break;
15878  }
15879  }
15880  }
15881 
15882  // If the user is a MFOCRF instruction, we know this is safe.
15883  // Otherwise we give up for right now.
15884  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15885  return SDValue(VCMPrecNode, 0);
15886  }
15887  break;
15888  case ISD::BR_CC: {
15889  // If this is a branch on an altivec predicate comparison, lower this so
15890  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15891  // lowering is done pre-legalize, because the legalizer lowers the predicate
15892  // compare down to code that is difficult to reassemble.
15893  // This code also handles branches that depend on the result of a store
15894  // conditional.
15895  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15896  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15897 
15898  int CompareOpc;
15899  bool isDot;
15900 
15901  if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
15902  break;
15903 
15904  // Since we are doing this pre-legalize, the RHS can be a constant of
15905  // arbitrary bitwidth which may cause issues when trying to get the value
15906  // from the underlying APInt.
15907  auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue();
15908  if (!RHSAPInt.isIntN(64))
15909  break;
15910 
15911  unsigned Val = RHSAPInt.getZExtValue();
15912  auto isImpossibleCompare = [&]() {
15913  // If this is a comparison against something other than 0/1, then we know
15914  // that the condition is never/always true.
15915  if (Val != 0 && Val != 1) {
15916  if (CC == ISD::SETEQ) // Cond never true, remove branch.
15917  return N->getOperand(0);
15918  // Always !=, turn it into an unconditional branch.
15919  return DAG.getNode(ISD::BR, dl, MVT::Other,
15920  N->getOperand(0), N->getOperand(4));
15921  }
15922  return SDValue();
15923  };
15924  // Combine branches fed by store conditional instructions (st[bhwd]cx).
15925  unsigned StoreWidth = 0;
15926  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15927  isStoreConditional(LHS, StoreWidth)) {
15928  if (SDValue Impossible = isImpossibleCompare())
15929  return Impossible;
15930  PPC::Predicate CompOpc;
15931  // eq 0 => ne
15932  // ne 0 => eq
15933  // eq 1 => eq
15934  // ne 1 => ne
15935  if (Val == 0)
15936  CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
15937  else
15938  CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
15939 
15940  SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
15941  DAG.getConstant(StoreWidth, dl, MVT::i32)};
15942  auto *MemNode = cast<MemSDNode>(LHS);
15943  SDValue ConstSt = DAG.getMemIntrinsicNode(
15944  PPCISD::STORE_COND, dl,
15945  DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
15946  MemNode->getMemoryVT(), MemNode->getMemOperand());
15947 
15948  SDValue InChain;
15949  // Unchain the branch from the original store conditional.
15950  if (N->getOperand(0) == LHS.getValue(1))
15951  InChain = LHS.getOperand(0);
15952  else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
15953  SmallVector<SDValue, 4> InChains;
15954  SDValue InTF = N->getOperand(0);
15955  for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
15956  if (InTF.getOperand(i) != LHS.getValue(1))
15957  InChains.push_back(InTF.getOperand(i));
15958  InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
15959  }
15960 
15961  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
15962  DAG.getConstant(CompOpc, dl, MVT::i32),
15963  DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
15964  ConstSt.getValue(2));
15965  }
15966 
15967  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15968  getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15969  assert(isDot && "Can't compare against a vector result!");
15970 
15971  if (SDValue Impossible = isImpossibleCompare())
15972  return Impossible;
15973 
15974  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15975  // Create the PPCISD altivec 'dot' comparison node.
15976  SDValue Ops[] = {
15977  LHS.getOperand(2), // LHS of compare
15978  LHS.getOperand(3), // RHS of compare
15979  DAG.getConstant(CompareOpc, dl, MVT::i32)
15980  };
15981  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15982  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15983 
15984  // Unpack the result based on how the target uses it.
15985  PPC::Predicate CompOpc;
15986  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15987  default: // Can't happen, don't crash on invalid number though.
15988  case 0: // Branch on the value of the EQ bit of CR6.
15989  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15990  break;
15991  case 1: // Branch on the inverted value of the EQ bit of CR6.
15992  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15993  break;
15994  case 2: // Branch on the value of the LT bit of CR6.
15995  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15996  break;
15997  case 3: // Branch on the inverted value of the LT bit of CR6.
15998  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15999  break;
16000  }
16001 
16002  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16003  DAG.getConstant(CompOpc, dl, MVT::i32),
16004  DAG.getRegister(PPC::CR6, MVT::i32),
16005  N->getOperand(4), CompNode.getValue(1));
16006  }
16007  break;
16008  }
16009  case ISD::BUILD_VECTOR:
16010  return DAGCombineBuildVector(N, DCI);
16011  case ISD::ABS:
16012  return combineABS(N, DCI);
16013  case ISD::VSELECT:
16014  return combineVSelect(N, DCI);
16015  }
16016 
16017  return SDValue();
16018 }
16019 
16020 SDValue
16022  SelectionDAG &DAG,
16023  SmallVectorImpl<SDNode *> &Created) const {
16024  // fold (sdiv X, pow2)
16025  EVT VT = N->getValueType(0);
16026  if (VT == MVT::i64 && !Subtarget.isPPC64())
16027  return SDValue();
16028  if ((VT != MVT::i32 && VT != MVT::i64) ||
16029  !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16030  return SDValue();
16031 
16032  SDLoc DL(N);
16033  SDValue N0 = N->getOperand(0);
16034 
16035  bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16036  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
16037  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16038 
16039  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16040  Created.push_back(Op.getNode());
16041 
16042  if (IsNegPow2) {
16043  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16044  Created.push_back(Op.getNode());
16045  }
16046 
16047  return Op;
16048 }
16049 
16050 //===----------------------------------------------------------------------===//
16051 // Inline Assembly Support
16052 //===----------------------------------------------------------------------===//
16053 
16055  KnownBits &Known,
16056  const APInt &DemandedElts,
16057  const SelectionDAG &DAG,
16058  unsigned Depth) const {
16059  Known.resetAll();
16060  switch (Op.getOpcode()) {
16061  default: break;
16062  case PPCISD::LBRX: {
16063  // lhbrx is known to have the top bits cleared out.
16064  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16065  Known.Zero = 0xFFFF0000;
16066  break;
16067  }
16068  case ISD::INTRINSIC_WO_CHAIN: {
16069  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
16070  default: break;
16071  case Intrinsic::ppc_altivec_vcmpbfp_p:
16072  case Intrinsic::ppc_altivec_vcmpeqfp_p:
16073  case Intrinsic::ppc_altivec_vcmpequb_p:
16074  case Intrinsic::ppc_altivec_vcmpequh_p:
16075  case Intrinsic::ppc_altivec_vcmpequw_p:
16076  case Intrinsic::ppc_altivec_vcmpequd_p:
16077  case Intrinsic::ppc_altivec_vcmpequq_p:
16078  case Intrinsic::ppc_altivec_vcmpgefp_p:
16079  case Intrinsic::ppc_altivec_vcmpgtfp_p:
16080  case Intrinsic::ppc_altivec_vcmpgtsb_p:
16081  case Intrinsic::ppc_altivec_vcmpgtsh_p:
16082  case Intrinsic::ppc_altivec_vcmpgtsw_p:
16083  case Intrinsic::ppc_altivec_vcmpgtsd_p:
16084  case Intrinsic::ppc_altivec_vcmpgtsq_p:
16085  case Intrinsic::ppc_altivec_vcmpgtub_p:
16086  case Intrinsic::ppc_altivec_vcmpgtuh_p:
16087  case Intrinsic::ppc_altivec_vcmpgtuw_p:
16088  case Intrinsic::ppc_altivec_vcmpgtud_p:
16089  case Intrinsic::ppc_altivec_vcmpgtuq_p:
16090  Known.Zero = ~1U; // All bits but the low one are known to be zero.
16091  break;
16092  }
16093  break;
16094  }
16095  case ISD::INTRINSIC_W_CHAIN: {
16096  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
16097  default:
16098  break;
16099  case Intrinsic::ppc_load2r:
16100  // Top bits are cleared for load2r (which is the same as lhbrx).
16101  Known.Zero = 0xFFFF0000;
16102  break;
16103  }
16104  break;
16105  }
16106  }
16107 }
16108 
16110  switch (Subtarget.getCPUDirective()) {
16111  default: break;
16112  case PPC::DIR_970:
16113  case PPC::DIR_PWR4:
16114  case PPC::DIR_PWR5:
16115  case PPC::DIR_PWR5X:
16116  case PPC::DIR_PWR6:
16117  case PPC::DIR_PWR6X:
16118  case PPC::DIR_PWR7:
16119  case PPC::DIR_PWR8:
16120  case PPC::DIR_PWR9:
16121  case PPC::DIR_PWR10:
16122  case PPC::DIR_PWR_FUTURE: {
16123  if (!ML)
16124  break;
16125 
16127  // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16128  // so that we can decrease cache misses and branch-prediction misses.
16129  // Actual alignment of the loop will depend on the hotness check and other
16130  // logic in alignBlocks.
16131  if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16132  return Align(32);
16133  }
16134 
16135  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16136 
16137  // For small loops (between 5 and 8 instructions), align to a 32-byte
16138  // boundary so that the entire loop fits in one instruction-cache line.
16139  uint64_t LoopSize = 0;
16140  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16141  for (const MachineInstr &J : **I) {
16142  LoopSize += TII->getInstSizeInBytes(J);
16143  if (LoopSize > 32)
16144  break;
16145  }
16146 
16147  if (LoopSize > 16 && LoopSize <= 32)
16148  return Align(32);
16149 
16150  break;
16151  }
16152  }
16153 
16155 }
16156 
16157 /// getConstraintType - Given a constraint, return the type of
16158 /// constraint it is for this target.
16161  if (Constraint.size() == 1) {
16162  switch (Constraint[0]) {
16163  default: break;
16164  case 'b':
16165  case 'r':
16166  case 'f':
16167  case 'd':
16168  case 'v':
16169  case 'y':
16170  return C_RegisterClass;
16171  case 'Z':
16172  // FIXME: While Z does indicate a memory constraint, it specifically
16173  // indicates an r+r address (used in conjunction with the 'y' modifier
16174  // in the replacement string). Currently, we're forcing the base
16175  // register to be r0 in the asm printer (which is interpreted as zero)
16176  // and forming the complete address in the second register. This is
16177  // suboptimal.
16178  return C_Memory;
16179  }
16180  } else if (Constraint == "wc") { // individual CR bits.
16181  return C_RegisterClass;
16182  } else if (Constraint == "wa" || Constraint == "wd" ||
16183  Constraint == "wf" || Constraint == "ws" ||
16184  Constraint == "wi" || Constraint == "ww") {
16185  return C_RegisterClass; // VSX registers.
16186  }
16187  return TargetLowering::getConstraintType(Constraint);
16188 }
16189 
16190 /// Examine constraint type and operand type and determine a weight value.
16191 /// This object must already have been set up with the operand type
16192 /// and the current alternative constraint selected.
16195  AsmOperandInfo &info, const char *constraint) const {
16196  ConstraintWeight weight = CW_Invalid;
16197  Value *CallOperandVal = info.CallOperandVal;
16198  // If we don't have a value, we can't do a match,
16199  // but allow it at the lowest weight.
16200  if (!CallOperandVal)
16201  return CW_Default;
16202  Type *type = CallOperandVal->getType();
16203 
16204  // Look at the constraint type.
16205  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16206  return CW_Register; // an individual CR bit.
16207  else if ((StringRef(constraint) == "wa" ||
16208  StringRef(constraint) == "wd" ||
16209  StringRef(constraint) == "wf") &&
16210  type->isVectorTy())
16211  return CW_Register;
16212  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16213  return CW_Register; // just hold 64-bit integers data.
16214  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16215  return CW_Register;
16216  else if (StringRef(constraint) == "ww" && type->isFloatTy())
16217  return CW_Register;
16218 
16219  switch (*constraint) {
16220  default:
16222  break;
16223  case 'b':
16224  if (type->isIntegerTy())
16225  weight = CW_Register;
16226  break;
16227  case 'f':
16228  if (type->isFloatTy())
16229  weight = CW_Register;
16230  break;
16231  case 'd':
16232  if (type->isDoubleTy())
16233  weight = CW_Register;
16234  break;
16235  case 'v':
16236  if (type->isVectorTy())
16237  weight = CW_Register;
16238  break;
16239  case 'y':
16240  weight = CW_Register;
16241  break;
16242  case 'Z':
16243  weight = CW_Memory;
16244  break;
16245  }
16246  return weight;
16247 }
16248 
16249 std::pair<unsigned, const TargetRegisterClass *>
16251  StringRef Constraint,
16252  MVT VT) const {
16253  if (Constraint.size() == 1) {
16254  // GCC RS6000 Constraint Letters
16255  switch (Constraint[0]) {
16256  case 'b': // R1-R31
16257  if (VT == MVT::i64 && Subtarget.isPPC64())
16258  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16259  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16260  case 'r': // R0-R31
16261  if (VT == MVT::i64 && Subtarget.isPPC64())
16262  return std::make_pair(0U, &PPC::G8RCRegClass);
16263  return std::make_pair(0U, &PPC::GPRCRegClass);
16264  // 'd' and 'f' constraints are both defined to be "the floating point
16265  // registers", where one is for 32-bit and the other for 64-bit. We don't
16266  // really care overly much here so just give them all the same reg classes.
16267  case 'd':
16268  case 'f':
16269  if (Subtarget.hasSPE()) {
16270  if (VT == MVT::f32 || VT == MVT::i32)
16271  return std::make_pair(0U, &PPC::GPRCRegClass);
16272  if (VT == MVT::f64 || VT == MVT::i64)
16273  return std::make_pair(0U, &PPC::SPERCRegClass);
16274  } else {
16275  if (VT == MVT::f32 || VT == MVT::i32)
16276  return std::make_pair(0U, &PPC::F4RCRegClass);
16277  if (VT == MVT::f64 || VT == MVT::i64)
16278  return std::make_pair(0U, &PPC::F8RCRegClass);
16279  }
16280  break;
16281  case 'v':
16282  if (Subtarget.hasAltivec() && VT.isVector())
16283  return std::make_pair(0U, &PPC::VRRCRegClass);
16284  else if (Subtarget.hasVSX())
16285  // Scalars in Altivec registers only make sense with VSX.
16286  return std::make_pair(0U, &PPC::VFRCRegClass);
16287  break;
16288  case 'y': // crrc
16289  return std::make_pair(0U, &PPC::CRRCRegClass);
16290  }
16291  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16292  // An individual CR bit.
16293  return std::make_pair(0U, &PPC::CRBITRCRegClass);
16294  } else if ((Constraint == "wa" || Constraint == "wd" ||
16295  Constraint == "wf" || Constraint == "wi") &&
16296  Subtarget.hasVSX()) {
16297  // A VSX register for either a scalar (FP) or vector. There is no
16298  // support for single precision scalars on subtargets prior to Power8.
16299  if (VT.isVector())
16300  return std::make_pair(0U, &PPC::VSRCRegClass);
16301  if (VT == MVT::f32 && Subtarget.hasP8Vector())
16302  return std::make_pair(0U, &PPC::VSSRCRegClass);
16303  return std::make_pair(0U, &PPC::VSFRCRegClass);
16304  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16305  if (VT == MVT::f32 && Subtarget.hasP8Vector())
16306  return std::make_pair(0U, &PPC::VSSRCRegClass);
16307  else
16308  return std::make_pair(0U, &PPC::VSFRCRegClass);
16309  } else if (Constraint == "lr") {
16310  if (VT == MVT::i64)
16311  return std::make_pair(0U, &PPC::LR8RCRegClass);
16312  else
16313  return std::make_pair(0U, &PPC::LRRCRegClass);
16314  }
16315 
16316  // Handle special cases of physical registers that are not properly handled
16317  // by the base class.
16318  if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16319  // If we name a VSX register, we can't defer to the base class because it
16320  // will not recognize the correct register (their names will be VSL{0-31}
16321  // and V{0-31} so they won't match). So we match them here.
16322  if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16323  int VSNum = atoi(Constraint.data() + 3);
16324  assert(VSNum >= 0 && VSNum <= 63 &&
16325  "Attempted to access a vsr out of range");
16326  if (VSNum < 32)
16327  return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16328  return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16329  }
16330 
16331  // For float registers, we can't defer to the base class as it will match
16332  // the SPILLTOVSRRC class.
16333  if (Constraint.size() > 3 && Constraint[1] == 'f') {
16334  int RegNum = atoi(Constraint.data() + 2);
16335  if (RegNum > 31 || RegNum < 0)
16336  report_fatal_error("Invalid floating point register number");
16337  if (VT == MVT::f32 || VT == MVT::i32)
16338  return Subtarget.hasSPE()
16339  ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16340  : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16341  if (VT == MVT::f64 || VT == MVT::i64)
16342  return Subtarget.hasSPE()
16343  ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16344  : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16345  }
16346  }
16347 
16348  std::pair<unsigned, const TargetRegisterClass *> R =
16350 
16351  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16352  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16353  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16354  // register.
16355  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16356  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16357  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16358  PPC::GPRCRegClass.contains(R.first))
16359  return std::make_pair(TRI->getMatchingSuperReg(R.first,
16360  PPC::sub_32, &PPC::G8RCRegClass),
16361  &PPC::G8RCRegClass);
16362 
16363  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16364  if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16365  R.first = PPC::CR0;
16366  R.second = &PPC::CRRCRegClass;
16367  }
16368  // FIXME: This warning should ideally be emitted in the front end.
16369  const auto &TM = getTargetMachine();
16370  if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16371  if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16372  (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16373  (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16374  errs() << "warning: vector registers 20 to 32 are reserved in the "
16375  "default AIX AltiVec ABI and cannot be used\n";
16376  }
16377 
16378  return R;
16379 }
16380 
16381 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16382 /// vector. If it is invalid, don't add anything to Ops.
16384  std::string &Constraint,
16385  std::vector<SDValue>&Ops,
16386  SelectionDAG &DAG) const {
16387  SDValue Result;
16388 
16389  // Only support length 1 constraints.
16390  if (Constraint.length() > 1) return;
16391 
16392  char Letter = Constraint[0];
16393  switch (Letter) {
16394  default: break;
16395  case 'I':
16396  case 'J':
16397  case 'K':
16398  case 'L':
16399  case 'M':
16400  case 'N':
16401  case 'O':
16402  case 'P': {
16403  ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16404  if (!CST) return; // Must be an immediate to match.
16405  SDLoc dl(Op);
16406  int64_t Value = CST->getSExtValue();
16407  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16408  // numbers are printed as such.
16409  switch (Letter) {
16410  default: llvm_unreachable("Unknown constraint letter!");
16411  case 'I': // "I" is a signed 16-bit constant.
16412  if (isInt<16>(Value))
16413  Result = DAG.getTargetConstant(Value, dl, TCVT);
16414  break;
16415  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16416  if (isShiftedUInt<16, 16>(Value))
16417  Result = DAG.getTargetConstant(Value, dl, TCVT);
16418  break;
16419  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16420  if (isShiftedInt<16, 16>(Value))
16421  Result = DAG.getTargetConstant(Value, dl, TCVT);
16422  break;
16423  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16424  if (isUInt<16>(Value))
16425  Result = DAG.getTargetConstant(Value, dl, TCVT);
16426  break;
16427  case 'M': // "M" is a constant that is greater than 31.
16428  if (Value > 31)
16429  Result = DAG.getTargetConstant(Value, dl, TCVT);
16430  break;
16431  case 'N': // "N" is a positive constant that is an exact power of two.
16432  if (Value > 0 && isPowerOf2_64(Value))
16433  Result = DAG.getTargetConstant(Value, dl, TCVT);
16434  break;
16435  case 'O': // "O" is the constant zero.
16436  if (Value == 0)
16437  Result = DAG.getTargetConstant(Value, dl, TCVT);
16438  break;
16439  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16440  if (isInt<16>(-Value))
16441  Result = DAG.getTargetConstant(Value, dl, TCVT);
16442  break;
16443  }
16444  break;
16445  }
16446  }
16447 
16448  if (Result.getNode()) {
16449  Ops.push_back(Result);
16450  return;
16451  }
16452 
16453  // Handle standard constraint letters.
16454  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16455 }
16456 
16459  SelectionDAG &DAG) const {
16460  if (I.getNumOperands() <= 1)
16461  return;
16462  if (!isa<ConstantSDNode>(Ops[1].getNode()))
16463  return;
16464  auto IntrinsicID = cast<ConstantSDNode>(Ops[1].getNode())->getZExtValue();
16465  if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16466  IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16467  return;
16468 
16469  if (I.hasMetadata("annotation")) {
16470  MDNode *MDN = I.getMetadata("annotation");
16471  Ops.push_back(DAG.getMDNode(MDN));
16472  }
16473 }
16474 
16475 // isLegalAddressingMode - Return true if the addressing mode represented
16476 // by AM is legal for this target, for a load/store of the specified type.
16478  const AddrMode &AM, Type *Ty,
16479  unsigned AS,
16480  Instruction *I) const {
16481  // Vector type r+i form is supported since power9 as DQ form. We don't check
16482  // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16483  // imm form is preferred and the offset can be adjusted to use imm form later
16484  // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16485  // max offset to check legal addressing mode, we should be a little aggressive
16486  // to contain other offsets for that LSRUse.
16487  if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16488  return false;
16489 
16490  // PPC allows a sign-extended 16-bit immediate field.
16491  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16492  return false;
16493 
16494  // No global is ever allowed as a base.
16495  if (AM.BaseGV)
16496  return false;
16497 
16498  // PPC only support r+r,
16499  switch (AM.Scale) {
16500  case 0: // "r+i" or just "i", depending on HasBaseReg.
16501  break;
16502  case 1:
16503  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16504  return false;
16505  // Otherwise we have r+r or r+i.
16506  break;
16507  case 2:
16508  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16509  return false;
16510  // Allow 2*r as r+r.
16511  break;
16512  default:
16513  // No other scales are supported.
16514  return false;
16515  }
16516 
16517  return true;
16518 }
16519 
16520 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16521  SelectionDAG &DAG) const {
16522  MachineFunction &MF = DAG.getMachineFunction();
16523  MachineFrameInfo &MFI = MF.getFrameInfo();
16524  MFI.setReturnAddressIsTaken(true);
16525 
16527  return SDValue();
16528 
16529  SDLoc dl(Op);
16530  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16531 
16532  // Make sure the function does not optimize away the store of the RA to
16533  // the stack.
16534  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16535  FuncInfo->setLRStoreRequired();
16536  bool isPPC64 = Subtarget.isPPC64();
16537  auto PtrVT = getPointerTy(MF.getDataLayout());
16538 
16539  if (Depth > 0) {
16540  // The link register (return address) is saved in the caller's frame
16541  // not the callee's stack frame. So we must get the caller's frame
16542  // address and load the return address at the LR offset from there.
16543  SDValue FrameAddr =
16544  DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16545  LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16546  SDValue Offset =
16547  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16548  isPPC64 ? MVT::i64 : MVT::i32);
16549  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16550  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16551  MachinePointerInfo());
16552  }
16553 
16554  // Just load the return address off the stack.
16555  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16556  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16557  MachinePointerInfo());
16558 }
16559 
16560 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16561  SelectionDAG &DAG) const {
16562  SDLoc dl(Op);
16563  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16564 
16565  MachineFunction &MF = DAG.getMachineFunction();
16566  MachineFrameInfo &MFI = MF.getFrameInfo();
16567  MFI.setFrameAddressIsTaken(true);
16568 
16569  EVT PtrVT = getPointerTy(MF.getDataLayout());
16570  bool isPPC64 = PtrVT == MVT::i64;
16571 
16572  // Naked functions never have a frame pointer, and so we use r1. For all
16573  // other functions, this decision must be delayed until during PEI.
16574  unsigned FrameReg;
16575  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16576  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16577  else
16578  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16579 
16580  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16581  PtrVT);
16582  while (Depth--)
16583  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16584  FrameAddr, MachinePointerInfo());
16585  return FrameAddr;
16586 }
16587 
16588 // FIXME? Maybe this could be a TableGen attribute on some registers and
16589 // this table could be generated automatically from RegInfo.
16591  const MachineFunction &MF) const {
16592  bool isPPC64 = Subtarget.isPPC64();
16593 
16594  bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16595  if (!is64Bit && VT != LLT::scalar(32))
16596  report_fatal_error("Invalid register global variable type");
16597 
16599  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16600  .Case("r2", isPPC64 ? Register() : PPC::R2)
16601  .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16602  .Default(Register());
16603 
16604  if (Reg)
16605  return Reg;
16606  report_fatal_error("Invalid register name global variable");
16607 }
16608 
16610  // 32-bit SVR4 ABI access everything as got-indirect.
16611  if (Subtarget.is32BitELFABI())
16612  return true;
16613 
16614  // AIX accesses everything indirectly through the TOC, which is similar to
16615  // the GOT.
16616  if (Subtarget.isAIXABI())
16617  return true;
16618 
16620  // If it is small or large code model, module locals are accessed
16621  // indirectly by loading their address from .toc/.got.
16622  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16623  return true;
16624 
16625  // JumpTable and BlockAddress are accessed as got-indirect.
16626  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16627  return true;
16628 
16629  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
16630  return Subtarget.isGVIndirectSymbol(G->getGlobal());
16631 
16632  return false;
16633 }
16634 
16635 bool
16637  // The PowerPC target isn't yet aware of offsets.
16638  return false;
16639 }
16640 
16642  const CallInst &I,
16643  MachineFunction &MF,
16644  unsigned Intrinsic) const {
16645  switch (Intrinsic) {
16646  case Intrinsic::ppc_atomicrmw_xchg_i128:
16647  case Intrinsic::ppc_atomicrmw_add_i128:
16648  case Intrinsic::ppc_atomicrmw_sub_i128:
16649  case Intrinsic::ppc_atomicrmw_nand_i128:
16650  case Intrinsic::ppc_atomicrmw_and_i128:
16651  case Intrinsic::ppc_atomicrmw_or_i128:
16652  case Intrinsic::ppc_atomicrmw_xor_i128:
16653  case Intrinsic::ppc_cmpxchg_i128:
16655  Info.memVT = MVT::i128;
16656  Info.ptrVal = I.getArgOperand(0);
16657  Info.offset = 0;
16658  Info.align = Align(16);
16661  return true;
16662  case Intrinsic::ppc_atomic_load_i128:
16664  Info.memVT = MVT::i128;
16665  Info.ptrVal = I.getArgOperand(0);
16666  Info.offset = 0;
16667  Info.align = Align(16);
16669  return true;
16670  case Intrinsic::ppc_atomic_store_i128:
16671  Info.opc = ISD::INTRINSIC_VOID;
16672  Info.memVT = MVT::i128;
16673  Info.ptrVal = I.getArgOperand(2);
16674  Info.offset = 0;
16675  Info.align = Align(16);
16677  return true;
16678  case Intrinsic::ppc_altivec_lvx:
16679  case Intrinsic::ppc_altivec_lvxl:
16680  case Intrinsic::ppc_altivec_lvebx:
16681  case Intrinsic::ppc_altivec_lvehx:
16682  case Intrinsic::ppc_altivec_lvewx:
16683  case Intrinsic::ppc_vsx_lxvd2x:
16684  case Intrinsic::ppc_vsx_lxvw4x:
16685  case Intrinsic::ppc_vsx_lxvd2x_be:
16686  case Intrinsic::ppc_vsx_lxvw4x_be:
16687  case Intrinsic::ppc_vsx_lxvl:
16688  case Intrinsic::ppc_vsx_lxvll: {
16689  EVT VT;
16690  switch (Intrinsic) {
16691  case Intrinsic::ppc_altivec_lvebx:
16692  VT = MVT::i8;
16693  break;
16694  case Intrinsic::ppc_altivec_lvehx:
16695  VT = MVT::i16;
16696  break;
16697  case Intrinsic::ppc_altivec_lvewx:
16698  VT = MVT::i32;
16699  break;
16700  case Intrinsic::ppc_vsx_lxvd2x:
16701  case Intrinsic::ppc_vsx_lxvd2x_be:
16702  VT = MVT::v2f64;
16703  break;
16704  default:
16705  VT = MVT::v4i32;
16706  break;
16707  }
16708 
16710  Info.memVT = VT;
16711  Info.ptrVal = I.getArgOperand(0);
16712  Info.offset = -VT.getStoreSize()+1;
16713  Info.size = 2*VT.getStoreSize()-1;
16714  Info.align = Align(1);
16716  return true;
16717  }
16718  case Intrinsic::ppc_altivec_stvx:
16719  case Intrinsic::ppc_altivec_stvxl:
16720  case Intrinsic::ppc_altivec_stvebx:
16721  case Intrinsic::ppc_altivec_stvehx:
16722  case Intrinsic::ppc_altivec_stvewx:
16723  case Intrinsic::ppc_vsx_stxvd2x:
16724  case Intrinsic::ppc_vsx_stxvw4x:
16725  case Intrinsic::ppc_vsx_stxvd2x_be:
16726  case Intrinsic::ppc_vsx_stxvw4x_be:
16727  case Intrinsic::ppc_vsx_stxvl:
16728  case Intrinsic::ppc_vsx_stxvll: {
16729  EVT VT;
16730  switch (Intrinsic) {
16731  case Intrinsic::ppc_altivec_stvebx:
16732  VT = MVT::i8;
16733  break;
16734  case Intrinsic::ppc_altivec_stvehx:
16735  VT = MVT::i16;
16736  break;
16737  case Intrinsic::ppc_altivec_stvewx:
16738  VT = MVT::i32;
16739  break;
16740  case Intrinsic::ppc_vsx_stxvd2x:
16741  case Intrinsic::ppc_vsx_stxvd2x_be:
16742  VT = MVT::v2f64;
16743  break;
16744  default:
16745  VT = MVT::v4i32;
16746  break;
16747  }
16748 
16749  Info.opc = ISD::INTRINSIC_VOID;
16750  Info.memVT = VT;
16751  Info.ptrVal = I.getArgOperand(1);
16752  Info.offset = -VT.getStoreSize()+1;
16753  Info.size = 2*VT.getStoreSize()-1;
16754  Info.align = Align(1);
16756  return true;
16757  }
16758  case Intrinsic::ppc_stdcx:
16759  case Intrinsic::ppc_stwcx:
16760  case Intrinsic::ppc_sthcx:
16761  case Intrinsic::ppc_stbcx: {
16762  EVT VT;
16763  auto Alignment = Align(8);
16764  switch (Intrinsic) {
16765  case Intrinsic::ppc_stdcx:
16766  VT = MVT::i64;
16767  break;
16768  case Intrinsic::ppc_stwcx:
16769  VT = MVT::i32;
16770  Alignment = Align(4);
16771  break;
16772  case Intrinsic::ppc_sthcx:
16773  VT = MVT::i16;
16774  Alignment = Align(2);
16775  break;
16776  case Intrinsic::ppc_stbcx:
16777  VT = MVT::i8;
16778  Alignment = Align(1);
16779  break;
16780  }
16782  Info.memVT = VT;
16783  Info.ptrVal = I.getArgOperand(0);
16784  Info.offset = 0;
16785  Info.align = Alignment;
16787  return true;
16788  }
16789  default:
16790  break;
16791  }
16792 
16793  return false;
16794 }
16795 
16796 /// It returns EVT::Other if the type should be determined using generic
16797 /// target-independent logic.
16799  const MemOp &Op, const AttributeList &FuncAttributes) const {
16800  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16801  // We should use Altivec/VSX loads and stores when available. For unaligned
16802  // addresses, unaligned VSX loads are only fast starting with the P8.
16803  if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16804  (Op.isAligned(Align(16)) ||
16805  ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16806  return MVT::v4i32;
16807  }
16808 
16809  if (Subtarget.isPPC64()) {
16810  return MVT::i64;
16811  }
16812 
16813  return MVT::i32;
16814 }
16815 
16816 /// Returns true if it is beneficial to convert a load of a constant
16817 /// to just the constant itself.
16819  Type *Ty) const {
16820  assert(Ty->isIntegerTy());
16821 
16822  unsigned BitSize = Ty->getPrimitiveSizeInBits();
16823  return !(BitSize == 0 || BitSize > 64);
16824 }
16825 
16827  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16828  return false;
16829  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16830  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16831  return NumBits1 == 64 && NumBits2 == 32;
16832 }
16833 
16835  if (!VT1.isInteger() || !VT2.isInteger())
16836  return false;
16837  unsigned NumBits1 = VT1.getSizeInBits();
16838  unsigned NumBits2 = VT2.getSizeInBits();
16839  return NumBits1 == 64 && NumBits2 == 32;
16840 }
16841 
16843  // Generally speaking, zexts are not free, but they are free when they can be
16844  // folded with other operations.
16845  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16846  EVT MemVT = LD->getMemoryVT();
16847  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16848  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16849  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16850  LD->getExtensionType() == ISD::ZEXTLOAD))
16851  return true;
16852  }
16853 
16854  // FIXME: Add other cases...
16855  // - 32-bit shifts with a zext to i64
16856  // - zext after ctlz, bswap, etc.
16857  // - zext after and by a constant mask
16858 
16859  return TargetLowering::isZExtFree(Val, VT2);
16860 }
16861 
16862 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16863  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16864  "invalid fpext types");
16865  // Extending to float128 is not free.
16866  if (DestVT == MVT::f128)
16867  return false;
16868  return true;
16869 }
16870 
16872  return isInt<16>(Imm) || isUInt<16>(Imm);
16873 }
16874 
16876  return isInt<16>(Imm) || isUInt<16>(Imm);
16877 }
16878 
16881  unsigned *Fast) const {
16882  if (DisablePPCUnaligned)
16883  return false;
16884 
16885  // PowerPC supports unaligned memory access for simple non-vector types.
16886  // Although accessing unaligned addresses is not as efficient as accessing
16887  // aligned addresses, it is generally more efficient than manual expansion,
16888  // and generally only traps for software emulation when crossing page
16889  // boundaries.
16890 
16891  if (!VT.isSimple())
16892  return false;
16893 
16894  if (VT.isFloatingPoint() && !VT.isVector() &&
16895  !Subtarget.allowsUnalignedFPAccess())
16896  return false;
16897 
16898  if (VT.getSimpleVT().isVector()) {
16899  if (Subtarget.hasVSX()) {
16900  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16901  VT != MVT::v4f32 && VT != MVT::v4i32)
16902  return false;
16903  } else {
16904  return false;
16905  }
16906  }
16907 
16908  if (VT == MVT::ppcf128)
16909  return false;
16910 
16911  if (Fast)
16912  *Fast = 1;
16913 
16914  return true;
16915 }
16916 
16918  SDValue C) const {
16919  // Check integral scalar types.
16920  if (!VT.isScalarInteger())
16921  return false;
16922  if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16923  if (!ConstNode->getAPIntValue().isSignedIntN(64))
16924  return false;
16925  // This transformation will generate >= 2 operations. But the following
16926  // cases will generate <= 2 instructions during ISEL. So exclude them.
16927  // 1. If the constant multiplier fits 16 bits, it can be handled by one
16928  // HW instruction, ie. MULLI
16929  // 2. If the multiplier after shifted fits 16 bits, an extra shift
16930  // instruction is needed than case 1, ie. MULLI and RLDICR
16931  int64_t Imm = ConstNode->getSExtValue();
16932  unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
16933  Imm >>= Shift;
16934  if (isInt<16>(Imm))
16935  return false;
16936  uint64_t UImm = static_cast<uint64_t>(Imm);
16937  if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16938  isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16939  return true;
16940  }
16941  return false;
16942 }
16943 
16945  EVT VT) const {
16947  MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16948 }
16949 
16951  Type *Ty) const {
16952  if (Subtarget.hasSPE())
16953  return false;
16954  switch (Ty->getScalarType()->getTypeID()) {
16955  case Type::FloatTyID:
16956  case Type::DoubleTyID:
16957  return true;
16958  case Type::FP128TyID:
16959  return Subtarget.hasP9Vector();
16960  default:
16961  return false;
16962  }
16963 }
16964 
16965 // FIXME: add more patterns which are not profitable to hoist.
16967  if (!I->hasOneUse())
16968  return true;
16969 
16970  Instruction *User = I->user_back();
16971  assert(User && "A single use instruction with no uses.");
16972 
16973  switch (I->getOpcode()) {
16974  case Instruction::FMul: {
16975  // Don't break FMA, PowerPC prefers FMA.
16976  if (User->getOpcode() != Instruction::FSub &&
16977  User->getOpcode() != Instruction::FAdd)
16978  return true;
16979 
16981  const Function *F = I->getFunction();
16982  const DataLayout &DL = F->getParent()->getDataLayout();
16983  Type *Ty = User->getOperand(0)->getType();
16984 
16985  return !(
16986  isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16988  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16989  }
16990  case Instruction::Load: {
16991  // Don't break "store (load float*)" pattern, this pattern will be combined
16992  // to "store (load int32)" in later InstCombine pass. See function
16993  // combineLoadToOperationType. On PowerPC, loading a float point takes more
16994  // cycles than loading a 32 bit integer.
16995  LoadInst *LI = cast<LoadInst>(I);
16996  // For the loads that combineLoadToOperationType does nothing, like
16997  // ordered load, it should be profitable to hoist them.
16998  // For swifterror load, it can only be used for pointer to pointer type, so
16999  // later type check should get rid of this case.
17000  if (!LI->isUnordered())
17001  return true;
17002 
17003  if (User->getOpcode() != Instruction::Store)
17004  return true;
17005 
17006  if (I->getType()->getTypeID() != Type::FloatTyID)
17007  return true;
17008 
17009  return false;
17010  }
17011  default:
17012  return true;
17013  }
17014  return true;
17015 }
17016 
17017 const MCPhysReg *
17019  // LR is a callee-save register, but we must treat it as clobbered by any call
17020  // site. Hence we include LR in the scratch registers, which are in turn added
17021  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17022  // to CTR, which is used by any indirect call.
17023  static const MCPhysReg ScratchRegs[] = {
17024  PPC::X12, PPC::LR8, PPC::CTR8, 0
17025  };
17026 
17027  return ScratchRegs;
17028 }
17029 
17031  const Constant *PersonalityFn) const {
17032  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17033 }
17034 
17036  const Constant *PersonalityFn) const {
17037  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17038 }
17039 
17040 bool
17042  EVT VT , unsigned DefinedValues) const {
17043  if (VT == MVT::v2i64)
17044  return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17045 
17046  if (Subtarget.hasVSX())
17047  return true;
17048 
17049  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
17050 }
17051 
17053  if (DisableILPPref || Subtarget.enableMachineScheduler())
17055 
17056  return Sched::ILP;
17057 }
17058 
17059 // Create a fast isel object.
17060 FastISel *
17062  const TargetLibraryInfo *LibInfo) const {
17063  return PPC::createFastISel(FuncInfo, LibInfo);
17064 }
17065 
17066 // 'Inverted' means the FMA opcode after negating one multiplicand.
17067 // For example, (fma -a b c) = (fnmsub a b c)
17068 static unsigned invertFMAOpcode(unsigned Opc) {
17069  switch (Opc) {
17070  default:
17071  llvm_unreachable("Invalid FMA opcode for PowerPC!");
17072  case ISD::FMA:
17073  return PPCISD::FNMSUB;
17074  case PPCISD::FNMSUB:
17075  return ISD::FMA;
17076  }
17077 }
17078 
17080  bool LegalOps, bool OptForSize,
17081  NegatibleCost &Cost,
17082  unsigned Depth) const {
17084  return SDValue();
17085 
17086  unsigned Opc = Op.getOpcode();
17087  EVT VT = Op.getValueType();
17088  SDNodeFlags Flags = Op.getNode()->getFlags();
17089 
17090  switch (Opc) {
17091  case PPCISD::FNMSUB:
17092  if (!Op.hasOneUse() || !isTypeLegal(VT))
17093  break;
17094 
17096  SDValue N0 = Op.getOperand(0);
17097  SDValue N1 = Op.getOperand(1);
17098  SDValue N2 = Op.getOperand(2);
17099  SDLoc Loc(Op);
17100 
17102  SDValue NegN2 =
17103  getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17104 
17105  if (!NegN2)
17106  return SDValue();
17107 
17108  // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17109  // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17110  // These transformations may change sign of zeroes. For example,
17111  // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17112  if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17113  // Try and choose the cheaper one to negate.
17115  SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17116  N0Cost, Depth + 1);
17117 
17119  SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17120  N1Cost, Depth + 1);
17121 
17122  if (NegN0 && N0Cost <= N1Cost) {
17123  Cost = std::min(N0Cost, N2Cost);
17124  return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17125  } else if (NegN1) {
17126  Cost = std::min(N1Cost, N2Cost);
17127  return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17128  }
17129  }
17130 
17131  // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17132  if (isOperationLegal(ISD::FMA, VT)) {
17133  Cost = N2Cost;
17134  return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17135  }
17136 
17137  break;
17138  }
17139 
17140  return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17141  Cost, Depth);
17142 }
17143 
17144 // Override to enable LOAD_STACK_GUARD lowering on Linux.
17146  if (!Subtarget.isTargetLinux())
17148  return true;
17149 }
17150 
17151 // Override to disable global variable loading on Linux and insert AIX canary
17152 // word declaration.
17154  if (Subtarget.isAIXABI()) {
17155  M.getOrInsertGlobal(AIXSSPCanaryWordName,
17156  Type::getInt8PtrTy(M.getContext()));
17157  return;
17158  }
17159  if (!Subtarget.isTargetLinux())
17161 }
17162 
17164  if (Subtarget.isAIXABI())
17165  return M.getGlobalVariable(AIXSSPCanaryWordName);
17167 }
17168 
17170  bool ForCodeSize) const {
17171  if (!VT.isSimple() || !Subtarget.hasVSX())
17172  return false;
17173 
17174  switch(VT.getSimpleVT().SimpleTy) {
17175  default:
17176  // For FP types that are currently not supported by PPC backend, return
17177  // false. Examples: f16, f80.
17178  return false;
17179  case MVT::f32:
17180  case MVT::f64: {
17181  if (Subtarget.hasPrefixInstrs()) {
17182  // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17183  return true;
17184  }
17185  bool IsExact;
17186  APSInt IntResult(16, false);
17187  // The rounding mode doesn't really matter because we only care about floats
17188  // that can be converted to integers exactly.
17189  Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17190  // For exact values in the range [-16, 15] we can materialize the float.
17191  if (IsExact && IntResult <= 15 && IntResult >= -16)
17192  return true;
17193  return Imm.isZero();
17194  }
17195  case MVT::ppcf128:
17196  return Imm.isPosZero();
17197  }
17198 }
17199 
17200 // For vector shift operation op, fold
17201 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17203  SelectionDAG &DAG) {
17204  SDValue N0 = N->getOperand(0);
17205  SDValue N1 = N->getOperand(1);
17206  EVT VT = N0.getValueType();
17207  unsigned OpSizeInBits = VT.getScalarSizeInBits();
17208  unsigned Opcode = N->getOpcode();
17209  unsigned TargetOpcode;
17210 
17211  switch (Opcode) {
17212  default:
17213  llvm_unreachable("Unexpected shift operation");
17214  case ISD::SHL:
17215  TargetOpcode = PPCISD::SHL;
17216  break;
17217  case ISD::SRL:
17218  TargetOpcode = PPCISD::SRL;
17219  break;
17220  case ISD::SRA:
17221  TargetOpcode = PPCISD::SRA;
17222  break;
17223  }
17224 
17225  if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17226  N1->getOpcode() == ISD::AND)
17228  if (Mask->getZExtValue() == OpSizeInBits - 1)
17229  return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17230 
17231  return SDValue();
17232 }
17233 
17234 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17235  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17236  return Value;
17237 
17238  SDValue N0 = N->getOperand(0);
17239  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17240  if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17241  N0.getOpcode() != ISD::SIGN_EXTEND ||
17242  N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17243  N->getValueType(0) != MVT::i64)
17244  return SDValue();
17245 
17246  // We can't save an operation here if the value is already extended, and
17247  // the existing shift is easier to combine.
17248  SDValue ExtsSrc = N0.getOperand(0);
17249  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17250  ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17251  return SDValue();
17252 
17253  SDLoc DL(N0);
17254  SDValue ShiftBy = SDValue(CN1, 0);
17255  // We want the shift amount to be i32 on the extswli, but the shift could
17256  // have an i64.
17257  if (ShiftBy.getValueType() == MVT::i64)
17258  ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17259 
17260  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17261  ShiftBy);
17262 }
17263 
17264 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17265  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17266  return Value;
17267 
17268  return SDValue();
17269 }
17270 
17271 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17272  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17273  return Value;
17274 
17275  return SDValue();
17276 }
17277 
17278 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17279 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17280 // When C is zero, the equation (addi Z, -C) can be simplified to Z
17281 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17283  const PPCSubtarget &Subtarget) {
17284  if (!Subtarget.isPPC64())
17285  return SDValue();
17286 
17287  SDValue LHS = N->getOperand(0);
17288  SDValue RHS = N->getOperand(1);
17289 
17290  auto isZextOfCompareWithConstant = [](SDValue Op) {
17291  if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17292  Op.getValueType() != MVT::i64)
17293  return false;
17294 
17295  SDValue Cmp = Op.getOperand(0);
17296  if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17297  Cmp.getOperand(0).getValueType() != MVT::i64)
17298  return false;
17299 
17300  if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17301  int64_t NegConstant = 0 - Constant->getSExtValue();
17302  // Due to the limitations of the addi instruction,
17303  // -C is required to be [-32768, 32767].
17304  return isInt<16>(NegConstant);
17305  }
17306 
17307  return false;
17308  };
17309 
17310  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17311  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17312 
17313  // If there is a pattern, canonicalize a zext operand to the RHS.
17314  if (LHSHasPattern && !RHSHasPattern)
17315  std::swap(LHS, RHS);
17316  else if (!LHSHasPattern && !RHSHasPattern)
17317  return SDValue();
17318 
17319  SDLoc DL(N);
17320  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17321  SDValue Cmp = RHS.getOperand(0);
17322  SDValue Z = Cmp.getOperand(0);
17323  auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17324  int64_t NegConstant = 0 - Constant->getSExtValue();
17325 
17326  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17327  default: break;
17328  case ISD::SETNE: {
17329  // when C == 0
17330  // --> addze X, (addic Z, -1).carry
17331  // /
17332  // add X, (zext(setne Z, C))--
17333  // \ when -32768 <= -C <= 32767 && C != 0
17334  // --> addze X, (addic (addi Z, -C), -1).carry
17335  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17336  DAG.getConstant(NegConstant, DL, MVT::i64));
17337  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17338  SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17339  AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17340  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17341  SDValue(Addc.getNode(), 1));
17342  }
17343  case ISD::SETEQ: {
17344  // when C == 0
17345  // --> addze X, (subfic Z, 0).carry
17346  // /
17347  // add X, (zext(sete Z, C))--
17348  // \ when -32768 <= -C <= 32767 && C != 0
17349  // --> addze X, (subfic (addi Z, -C), 0).carry
17350  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17351  DAG.getConstant(NegConstant, DL, MVT::i64));
17352  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17353  SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17354  DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17355  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17356  SDValue(Subc.getNode(), 1));
17357  }
17358  }
17359 
17360  return SDValue();
17361 }
17362 
17363 // Transform
17364 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17365 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17366 // In this case both C1 and C2 must be known constants.
17367 // C1+C2 must fit into a 34 bit signed integer.
17369  const PPCSubtarget &Subtarget) {
17370  if (!Subtarget.isUsingPCRelativeCalls())
17371  return SDValue();
17372 
17373  // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17374  // If we find that node try to cast the Global Address and the Constant.
17375  SDValue LHS = N->getOperand(0);
17376  SDValue RHS = N->getOperand(1);
17377 
17378  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17379  std::swap(LHS, RHS);
17380 
17381  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17382  return SDValue();
17383 
17384  // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17385  GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17386  ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17387 
17388  // Check that both casts succeeded.
17389  if (!GSDN || !ConstNode)
17390  return SDValue();
17391 
17392  int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17393  SDLoc DL(GSDN);
17394 
17395  // The signed int offset needs to fit in 34 bits.
17396  if (!isInt<34>(NewOffset))
17397  return SDValue();
17398 
17399  // The new global address is a copy of the old global address except
17400  // that it has the updated Offset.
17401  SDValue GA =
17402  DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17403  NewOffset, GSDN->getTargetFlags());
17404  SDValue MatPCRel =
17405  DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17406  return MatPCRel;
17407 }
17408 
17409 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17410  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17411  return Value;
17412 
17413  if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17414  return Value;
17415 
17416  return SDValue();
17417 }
17418 
17419 // Detect TRUNCATE operations on bitcasts of float128 values.
17420 // What we are looking for here is the situtation where we extract a subset
17421 // of bits from a 128 bit float.
17422 // This can be of two forms:
17423 // 1) BITCAST of f128 feeding TRUNCATE
17424 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17425 // The reason this is required is because we do not have a legal i128 type
17426 // and so we want to prevent having to store the f128 and then reload part
17427 // of it.
17428 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17429  DAGCombinerInfo &DCI) const {
17430  // If we are using CRBits then try that first.
17431  if (Subtarget.useCRBits()) {
17432  // Check if CRBits did anything and return that if it did.
17433  if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17434  return CRTruncValue;
17435  }
17436 
17437  SDLoc dl(N);
17438  SDValue Op0 = N->getOperand(0);
17439 
17440  // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
17441  if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
17442  EVT VT = N->getValueType(0);
17443  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17444  return SDValue();
17445  SDValue Sub = Op0.getOperand(0);
17446  if (Sub.getOpcode() == ISD::SUB) {
17447  SDValue SubOp0 = Sub.getOperand(0);
17448  SDValue SubOp1 = Sub.getOperand(1);
17449  if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
17450  (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
17451  return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
17452  SubOp1.getOperand(0),
17453  DCI.DAG.getTargetConstant(0, dl, MVT::i32));
17454  }
17455  }
17456  }
17457 
17458  // Looking for a truncate of i128 to i64.
17459  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17460  return SDValue();
17461 
17462  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17463 
17464  // SRL feeding TRUNCATE.
17465  if (Op0.getOpcode() == ISD::SRL) {
17466  ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17467  // The right shift has to be by 64 bits.
17468  if (!ConstNode || ConstNode->getZExtValue() != 64)
17469  return SDValue();
17470 
17471  // Switch the element number to extract.
17472  EltToExtract = EltToExtract ? 0 : 1;
17473  // Update Op0 past the SRL.
17474  Op0 = Op0.getOperand(0);
17475  }
17476 
17477  // BITCAST feeding a TRUNCATE possibly via SRL.
17478  if (Op0.getOpcode() == ISD::BITCAST &&
17479  Op0.getValueType() == MVT::i128 &&
17480  Op0.getOperand(0).getValueType() == MVT::f128) {
17481  SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17482  return DCI.DAG.getNode(
17484  DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17485  }
17486  return SDValue();
17487 }
17488 
17489 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17490  SelectionDAG &DAG = DCI.DAG;
17491 
17492  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17493  if (!ConstOpOrElement)
17494  return SDValue();
17495 
17496  // An imul is usually smaller than the alternative sequence for legal type.
17497  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17498  isOperationLegal(ISD::MUL, N->getValueType(0)))
17499  return SDValue();
17500 
17501  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17502  switch (this->Subtarget.getCPUDirective()) {
17503  default:
17504  // TODO: enhance the condition for subtarget before pwr8
17505  return false;
17506  case PPC::DIR_PWR8:
17507  // type mul add shl
17508  // scalar 4 1 1
17509  // vector 7 2 2
17510  return true;
17511  case PPC::DIR_PWR9:
17512  case PPC::DIR_PWR10:
17513  case PPC::DIR_PWR_FUTURE:
17514  // type mul add shl
17515  // scalar 5 2 2
17516  // vector 7 2 2
17517 
17518  // The cycle RATIO of related operations are showed as a table above.
17519  // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17520  // scalar and vector type. For 2 instrs patterns, add/sub + shl
17521  // are 4, it is always profitable; but for 3 instrs patterns
17522  // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17523  // So we should only do it for vector type.
17524  return IsAddOne && IsNeg ? VT.isVector() : true;
17525  }
17526  };
17527 
17528  EVT VT = N->getValueType(0);
17529  SDLoc DL(N);
17530 
17531  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17532  bool IsNeg = MulAmt.isNegative();
17533  APInt MulAmtAbs = MulAmt.abs();
17534 
17535  if ((MulAmtAbs - 1).isPowerOf2()) {
17536  // (mul x, 2^N + 1) => (add (shl x, N), x)
17537  // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17538 
17539  if (!IsProfitable(IsNeg, true, VT))
17540  return SDValue();
17541 
17542  SDValue Op0 = N->getOperand(0);
17543  SDValue Op1 =
17544  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17545  DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17546  SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17547 
17548  if (!IsNeg)
17549  return Res;
17550 
17551  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17552  } else if ((MulAmtAbs + 1).isPowerOf2()) {
17553  // (mul x, 2^N - 1) => (sub (shl x, N), x)
17554  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17555 
17556  if (!IsProfitable(IsNeg, false, VT))
17557  return SDValue();
17558 
17559  SDValue Op0 = N->getOperand(0);
17560  SDValue Op1 =
17561  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17562  DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17563 
17564  if (!IsNeg)
17565  return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17566  else
17567  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17568 
17569  } else {
17570  return SDValue();
17571  }
17572 }
17573 
17574 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17575 // in combiner since we need to check SD flags and other subtarget features.
17576 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17577  DAGCombinerInfo &DCI) const {
17578  SDValue N0 = N->getOperand(0);
17579  SDValue N1 = N->getOperand(1);
17580  SDValue N2 = N->getOperand(2);
17581  SDNodeFlags Flags = N->getFlags();
17582  EVT VT = N->getValueType(0);
17583  SelectionDAG &DAG = DCI.DAG;
17585  unsigned Opc = N->getOpcode();
17586  bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17587  bool LegalOps = !DCI.isBeforeLegalizeOps();
17588  SDLoc Loc(N);
17589 
17590  if (!isOperationLegal(ISD::FMA, VT))
17591  return SDValue();
17592 
17593  // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17594  // since (fnmsub a b c)=-0 while c-ab=+0.
17595  if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17596  return SDValue();
17597 
17598  // (fma (fneg a) b c) => (fnmsub a b c)
17599  // (fnmsub (fneg a) b c) => (fma a b c)
17600  if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17601  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17602 
17603  // (fma a (fneg b) c) => (fnmsub a b c)
17604  // (fnmsub a (fneg b) c) => (fma a b c)
17605  if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17606  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17607 
17608  return SDValue();
17609 }
17610 
17611 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17612  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17613  if (!Subtarget.is64BitELFABI())
17614  return false;
17615 
17616  // If not a tail call then no need to proceed.
17617  if (!CI->isTailCall())
17618  return false;
17619 
17620  // If sibling calls have been disabled and tail-calls aren't guaranteed
17621  // there is no reason to duplicate.
17622  auto &TM = getTargetMachine();
17623  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17624  return false;
17625 
17626  // Can't tail call a function called indirectly, or if it has variadic args.
17627  const Function *Callee = CI->getCalledFunction();
17628  if (!Callee || Callee->isVarArg())
17629  return false;
17630 
17631  // Make sure the callee and caller calling conventions are eligible for tco.
17632  const Function *Caller = CI->getParent()->getParent();
17633  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17634  CI->getCallingConv()))
17635  return false;
17636 
17637  // If the function is local then we have a good chance at tail-calling it
17638  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17639 }
17640 
17641 bool PPCTargetLowering::
17642 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17643  const Value *Mask = AndI.getOperand(1);
17644  // If the mask is suitable for andi. or andis. we should sink the and.
17645  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17646  // Can't handle constants wider than 64-bits.
17647  if (CI->getBitWidth() > 64)
17648  return false;
17649  int64_t ConstVal = CI->getZExtValue();
17650  return isUInt<16>(ConstVal) ||
17651  (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17652  }
17653 
17654  // For non-constant masks, we can always use the record-form and.
17655  return true;
17656 }
17657 
17658 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
17659 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
17660 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
17661 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
17662 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
17663 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
17664  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
17665  assert(Subtarget.hasP9Altivec() &&
17666  "Only combine this when P9 altivec supported!");
17667  EVT VT = N->getValueType(0);
17668  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17669  return SDValue();
17670 
17671  SelectionDAG &DAG = DCI.DAG;
17672  SDLoc dl(N);
17673  if (N->getOperand(0).getOpcode() == ISD::SUB) {
17674  // Even for signed integers, if it's known to be positive (as signed
17675  // integer) due to zero-extended inputs.
17676  unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
17677  unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
17678  if ((SubOpcd0 == ISD::ZERO_EXTEND ||
17679  SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
17680  (SubOpcd1 == ISD::ZERO_EXTEND ||
17681  SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
17682  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17683  N->getOperand(0)->getOperand(0),
17684  N->getOperand(0)->getOperand(1),
17685  DAG.getTargetConstant(0, dl, MVT::i32));
17686  }
17687 
17688  // For type v4i32, it can be optimized with xvnegsp + vabsduw
17689  if (N->getOperand(0).getValueType() == MVT::v4i32 &&
17690  N->getOperand(0).hasOneUse()) {
17691  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17692  N->getOperand(0)->getOperand(0),
17693  N->getOperand(0)->getOperand(1),
17694  DAG.getTargetConstant(1, dl, MVT::i32));
17695  }
17696  }
17697 
17698  return SDValue();
17699 }
17700 
17701 // For type v4i32/v8ii16/v16i8, transform
17702 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
17703 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
17704 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
17705 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
17706 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
17707  DAGCombinerInfo &DCI) const {
17708  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
17709  assert(Subtarget.hasP9Altivec() &&
17710  "Only combine this when P9 altivec supported!");
17711 
17712  SelectionDAG &DAG = DCI.DAG;
17713  SDLoc dl(N);
17714  SDValue Cond = N->getOperand(0);
17715  SDValue TrueOpnd = N->getOperand(1);
17716  SDValue FalseOpnd = N->getOperand(2);
17717  EVT VT = N->getOperand(1).getValueType();
17718 
17719  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
17720  FalseOpnd.getOpcode() != ISD::SUB)
17721  return SDValue();
17722 
17723  // ABSD only available for type v4i32/v8i16/v16i8
17724  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17725  return SDValue();
17726 
17727  // At least to save one more dependent computation
17728  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17729  return SDValue();
17730 
17731  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17732 
17733  // Can only handle unsigned comparison here
17734  switch (CC) {
17735  default:
17736  return SDValue();
17737  case ISD::SETUGT:
17738  case ISD::SETUGE:
17739  break;
17740  case ISD::SETULT:
17741  case ISD::SETULE:
17742  std::swap(TrueOpnd, FalseOpnd);
17743  break;
17744  }
17745 
17746  SDValue CmpOpnd1 = Cond.getOperand(0);
17747  SDValue CmpOpnd2 = Cond.getOperand(1);
17748 
17749  // SETCC CmpOpnd1 CmpOpnd2 cond
17750  // TrueOpnd = CmpOpnd1 - CmpOpnd2
17751  // FalseOpnd = CmpOpnd2 - CmpOpnd1
17752  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17753  TrueOpnd.getOperand(1) == CmpOpnd2 &&
17754  FalseOpnd.getOperand(0) == CmpOpnd2 &&
17755  FalseOpnd.getOperand(1) == CmpOpnd1) {
17756  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17757  CmpOpnd1, CmpOpnd2,
17758  DAG.getTargetConstant(0, dl, MVT::i32));
17759  }
17760 
17761  return SDValue();
17762 }
17763 
17764 /// getAddrModeForFlags - Based on the set of address flags, select the most
17765 /// optimal instruction format to match by.
17766 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17767  // This is not a node we should be handling here.
17768  if (Flags == PPC::MOF_None)
17769  return PPC::AM_None;
17770  // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17771  for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17772  if ((Flags & FlagSet) == FlagSet)
17773  return PPC::AM_DForm;
17774  for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17775  if ((Flags & FlagSet) == FlagSet)
17776  return PPC::AM_DSForm;
17777  for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17778  if ((Flags & FlagSet) == FlagSet)
17779  return PPC::AM_DQForm;
17780  for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
17781  if ((Flags & FlagSet) == FlagSet)
17782  return PPC::AM_PrefixDForm;
17783  // If no other forms are selected, return an X-Form as it is the most
17784  // general addressing mode.
17785  return PPC::AM_XForm;
17786 }
17787 
17788 /// Set alignment flags based on whether or not the Frame Index is aligned.
17789 /// Utilized when computing flags for address computation when selecting
17790 /// load and store instructions.
17791 static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17792  SelectionDAG &DAG) {
17793  bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17794  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17795  if (!FI)
17796  return;
17797  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17798  unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17799  // If this is (add $FI, $S16Imm), the alignment flags are already set
17800  // based on the immediate. We just need to clear the alignment flags
17801  // if the FI alignment is weaker.
17802  if ((FrameIndexAlign % 4) != 0)
17803  FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
17804  if ((FrameIndexAlign % 16) != 0)
17805  FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
17806  // If the address is a plain FrameIndex, set alignment flags based on
17807  // FI alignment.
17808  if (!IsAdd) {
17809  if ((FrameIndexAlign % 4) == 0)
17810  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17811  if ((FrameIndexAlign % 16) == 0)
17812  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17813  }
17814 }
17815 
17816 /// Given a node, compute flags that are used for address computation when
17817 /// selecting load and store instructions. The flags computed are stored in
17818 /// FlagSet. This function takes into account whether the node is a constant,
17819 /// an ADD, OR, or a constant, and computes the address flags accordingly.
17820 static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
17821  SelectionDAG &DAG) {
17822  // Set the alignment flags for the node depending on if the node is
17823  // 4-byte or 16-byte aligned.
17824  auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17825  if ((Imm & 0x3) == 0)
17826  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17827  if ((Imm & 0xf) == 0)
17828  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17829  };
17830 
17831  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
17832  // All 32-bit constants can be computed as LIS + Disp.
17833  const APInt &ConstImm = CN->getAPIntValue();
17834  if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17835  FlagSet |= PPC::MOF_AddrIsSImm32;
17836  SetAlignFlagsForImm(ConstImm.getZExtValue());
17837  setAlignFlagsForFI(N, FlagSet, DAG);
17838  }
17839  if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17840  FlagSet |= PPC::MOF_RPlusSImm34;
17841  else // Let constant materialization handle large constants.
17842  FlagSet |= PPC::MOF_NotAddNorCst;
17843  } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17844  // This address can be represented as an addition of:
17845  // - Register + Imm16 (possibly a multiple of 4/16)
17846  // - Register + Imm34
17847  // - Register + PPCISD::Lo
17848  // - Register + Register
17849  // In any case, we won't have to match this as Base + Zero.
17850  SDValue RHS = N.getOperand(1);
17851  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
17852  const APInt &ConstImm = CN->getAPIntValue();
17853  if (ConstImm.isSignedIntN(16)) {
17854  FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
17855  SetAlignFlagsForImm(ConstImm.getZExtValue());
17856  setAlignFlagsForFI(N, FlagSet, DAG);
17857  }
17858  if (ConstImm.isSignedIntN(34))
17859  FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
17860  else
17861  FlagSet |= PPC::MOF_RPlusR; // Register.
17862  } else if (RHS.getOpcode() == PPCISD::Lo &&
17863  !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
17864  FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
17865  else
17866  FlagSet |= PPC::MOF_RPlusR;
17867  } else { // The address computation is not a constant or an addition.
17868  setAlignFlagsForFI(N, FlagSet, DAG);
17869  FlagSet |= PPC::MOF_NotAddNorCst;
17870  }
17871 }
17872 
17873 static bool isPCRelNode(SDValue N) {
17874  return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
17875  isValidPCRelNode<ConstantPoolSDNode>(N) ||
17876  isValidPCRelNode<GlobalAddressSDNode>(N) ||
17877  isValidPCRelNode<JumpTableSDNode>(N) ||
17878  isValidPCRelNode<BlockAddressSDNode>(N));
17879 }
17880 
17881 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
17882 /// the address flags of the load/store instruction that is to be matched.
17883 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
17884  SelectionDAG &DAG) const {
17885  unsigned FlagSet = PPC::MOF_None;
17886 
17887  // Compute subtarget flags.
17888  if (!Subtarget.hasP9Vector())
17889  FlagSet |= PPC::MOF_SubtargetBeforeP9;
17890  else {
17891  FlagSet |= PPC::MOF_SubtargetP9;
17892  if (Subtarget.hasPrefixInstrs())
17893  FlagSet |= PPC::MOF_SubtargetP10;
17894  }
17895  if (Subtarget.hasSPE())
17896  FlagSet |= PPC::MOF_SubtargetSPE;
17897 
17898  // Check if we have a PCRel node and return early.
17899  if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
17900  return FlagSet;
17901 
17902  // If the node is the paired load/store intrinsics, compute flags for
17903  // address computation and return early.
17904  unsigned ParentOp = Parent->getOpcode();
17905  if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
17906  (ParentOp == ISD::INTRINSIC_VOID))) {
17907  unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
17908  if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
17909  SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
17910  ? Parent->getOperand(2)
17911  : Parent->getOperand(3);
17912  computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
17913  FlagSet |= PPC::MOF_Vector;
17914  return FlagSet;
17915  }
17916  }
17917 
17918  // Mark this as something we don't want to handle here if it is atomic
17919  // or pre-increment instruction.
17920  if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17921  if (LSB->isIndexed())
17922  return PPC::MOF_None;
17923 
17924  // Compute in-memory type flags. This is based on if there are scalars,
17925  // floats or vectors.
17926  const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17927  assert(MN && "Parent should be a MemSDNode!");
17928  EVT MemVT = MN->getMemoryVT();
17929  unsigned Size = MemVT.getSizeInBits();
17930  if (MemVT.isScalarInteger()) {
17931  assert(Size <= 128 &&
17932  "Not expecting scalar integers larger than 16 bytes!");
17933  if (Size < 32)
17934  FlagSet |= PPC::MOF_SubWordInt;
17935  else if (Size == 32)
17936  FlagSet |= PPC::MOF_WordInt;
17937  else
17938  FlagSet |= PPC::MOF_DoubleWordInt;
17939  } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
17940  if (Size == 128)
17941  FlagSet |= PPC::MOF_Vector;
17942  else if (Size == 256) {
17943  assert(Subtarget.pairedVectorMemops() &&
17944  "256-bit vectors are only available when paired vector memops is "
17945  "enabled!");
17946  FlagSet |= PPC::MOF_Vector;
17947  } else
17948  llvm_unreachable("Not expecting illegal vectors!");
17949  } else { // Floating point type: can be scalar, f128 or vector types.
17950  if (Size == 32 || Size == 64)
17951  FlagSet |= PPC::MOF_ScalarFloat;
17952  else if (MemVT == MVT::f128 || MemVT.isVector())
17953  FlagSet |= PPC::MOF_Vector;
17954  else
17955  llvm_unreachable("Not expecting illegal scalar floats!");
17956  }
17957 
17958  // Compute flags for address computation.
17959  computeFlagsForAddressComputation(N, FlagSet, DAG);
17960 
17961  // Compute type extension flags.
17962  if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17963  switch (LN->getExtensionType()) {
17964  case ISD::SEXTLOAD:
17965  FlagSet |= PPC::MOF_SExt;
17966  break;
17967  case ISD::EXTLOAD:
17968  case ISD::ZEXTLOAD:
17969  FlagSet |= PPC::MOF_ZExt;
17970  break;
17971  case ISD::NON_EXTLOAD:
17972  FlagSet |= PPC::MOF_NoExt;
17973  break;
17974  }
17975  } else
17976  FlagSet |= PPC::MOF_NoExt;
17977 
17978  // For integers, no extension is the same as zero extension.
17979  // We set the extension mode to zero extension so we don't have
17980  // to add separate entries in AddrModesMap for loads and stores.
17981  if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
17982  FlagSet |= PPC::MOF_ZExt;
17983  FlagSet &= ~PPC::MOF_NoExt;
17984  }
17985 
17986  // If we don't have prefixed instructions, 34-bit constants should be
17987  // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
17988  bool IsNonP1034BitConst =
17990  FlagSet) == PPC::MOF_RPlusSImm34;
17991  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
17992  IsNonP1034BitConst)
17993  FlagSet |= PPC::MOF_NotAddNorCst;
17994 
17995  return FlagSet;
17996 }
17997 
17998 /// SelectForceXFormMode - Given the specified address, force it to be
17999 /// represented as an indexed [r+r] operation (an XForm instruction).
18001  SDValue &Base,
18002  SelectionDAG &DAG) const {
18003 
18005  int16_t ForceXFormImm = 0;
18006  if (provablyDisjointOr(DAG, N) &&
18007  !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18008  Disp = N.getOperand(0);
18009  Base = N.getOperand(1);
18010  return Mode;
18011  }
18012 
18013  // If the address is the result of an add, we will utilize the fact that the
18014  // address calculation includes an implicit add. However, we can reduce
18015  // register pressure if we do not materialize a constant just for use as the
18016  // index register. We only get rid of the add if it is not an add of a
18017  // value and a 16-bit signed constant and both have a single use.
18018  if (N.getOpcode() == ISD::ADD &&
18019  (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18020  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18021  Disp = N.getOperand(0);
18022  Base = N.getOperand(1);
18023  return Mode;
18024  }
18025 
18026  // Otherwise, use R0 as the base register.
18027  Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18028  N.getValueType());
18029  Base = N;
18030 
18031  return Mode;
18032 }
18033 
18035  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18036  unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18037  EVT ValVT = Val.getValueType();
18038  // If we are splitting a scalar integer into f64 parts (i.e. so they
18039  // can be placed into VFRC registers), we need to zero extend and
18040  // bitcast the values. This will ensure the value is placed into a
18041  // VSR using direct moves or stack operations as needed.
18042  if (PartVT == MVT::f64 &&
18043  (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18044  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18045  Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18046  Parts[0] = Val;
18047  return true;
18048  }
18049  return false;
18050 }
18051 
18052 SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18053  SelectionDAG &DAG) const {
18054  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18056  EVT RetVT = Op.getValueType();
18057  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18058  SDValue Callee =
18059  DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18060  bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18063  for (const SDValue &N : Op->op_values()) {
18064  EVT ArgVT = N.getValueType();
18065  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18066  Entry.Node = N;
18067  Entry.Ty = ArgTy;
18068  Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18069  Entry.IsZExt = !Entry.IsSExt;
18070  Args.push_back(Entry);
18071  }
18072 
18073  SDValue InChain = DAG.getEntryNode();
18074  SDValue TCChain = InChain;
18075  const Function &F = DAG.getMachineFunction().getFunction();
18076  bool isTailCall =
18077  TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18078  (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18079  if (isTailCall)
18080  InChain = TCChain;
18081  CLI.setDebugLoc(SDLoc(Op))
18082  .setChain(InChain)
18083  .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18084  .setTailCall(isTailCall)
18085  .setSExtResult(SignExtend)
18086  .setZExtResult(!SignExtend)
18088  return TLI.LowerCallTo(CLI).first;
18089 }
18090 
18091 SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18092  const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18093  SelectionDAG &DAG) const {
18094  if (Op.getValueType() == MVT::f32)
18095  return lowerToLibCall(LibCallFloatName, Op, DAG);
18096 
18097  if (Op.getValueType() == MVT::f64)
18098  return lowerToLibCall(LibCallDoubleName, Op, DAG);
18099 
18100  return SDValue();
18101 }
18102 
18103 bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18104  SDNodeFlags Flags = Op.getNode()->getFlags();
18105  return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18106  Flags.hasNoNaNs() && Flags.hasNoInfs();
18107 }
18108 
18109 bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18110  return Op.getNode()->getFlags().hasApproximateFuncs();
18111 }
18112 
18113 bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18115 }
18116 
18117 SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18118  const char *LibCallFloatName,
18119  const char *LibCallDoubleNameFinite,
18120  const char *LibCallFloatNameFinite,
18121  SDValue Op,
18122  SelectionDAG &DAG) const {
18123  if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18124  return SDValue();
18125 
18126  if (!isLowringToMASSFiniteSafe(Op))
18127  return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18128  DAG);
18129 
18130  return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18131  LibCallDoubleNameFinite, Op, DAG);
18132 }
18133 
18134 SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18135  return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18136  "__xl_powf_finite", Op, DAG);
18137 }
18138 
18139 SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18140  return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18141  "__xl_sinf_finite", Op, DAG);
18142 }
18143 
18144 SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18145  return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18146  "__xl_cosf_finite", Op, DAG);
18147 }
18148 
18149 SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18150  return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18151  "__xl_logf_finite", Op, DAG);
18152 }
18153 
18154 SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18155  return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18156  "__xl_log10f_finite", Op, DAG);
18157 }
18158 
18159 SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18160  return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18161  "__xl_expf_finite", Op, DAG);
18162 }
18163 
18164 // If we happen to match to an aligned D-Form, check if the Frame Index is
18165 // adequately aligned. If it is not, reset the mode to match to X-Form.
18166 static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18167  PPC::AddrMode &Mode) {
18168  if (!isa<FrameIndexSDNode>(N))
18169  return;
18170  if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18171  (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18172  Mode = PPC::AM_XForm;
18173 }
18174 
18175 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18176 /// compute the address flags of the node, get the optimal address mode based
18177 /// on the flags, and set the Base and Disp based on the address mode.
18179  SDValue N, SDValue &Disp,
18180  SDValue &Base,
18181  SelectionDAG &DAG,
18182  MaybeAlign Align) const {
18183  SDLoc DL(Parent);
18184 
18185  // Compute the address flags.
18186  unsigned Flags = computeMOFlags(Parent, N, DAG);
18187 
18188  // Get the optimal address mode based on the Flags.
18189  PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18190 
18191  // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18192  // Select an X-Form load if it is not.
18193  setXFormForUnalignedFI(N, Flags, Mode);
18194 
18195  // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18196  if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18197  assert(Subtarget.isUsingPCRelativeCalls() &&
18198  "Must be using PC-Relative calls when a valid PC-Relative node is "
18199  "present!");
18200  Mode = PPC::AM_PCRel;
18201  }
18202 
18203  // Set Base and Disp accordingly depending on the address mode.
18204  switch (Mode) {
18205  case PPC::AM_DForm:
18206  case PPC::AM_DSForm:
18207  case PPC::AM_DQForm: {
18208  // This is a register plus a 16-bit immediate. The base will be the
18209  // register and the displacement will be the immediate unless it
18210  // isn't sufficiently aligned.
18211  if (Flags & PPC::MOF_RPlusSImm16) {
18212  SDValue Op0 = N.getOperand(0);
18213  SDValue Op1 = N.getOperand(1);
18214  int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
18215  if (!Align || isAligned(*Align, Imm)) {
18216  Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18217  Base = Op0;
18218  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18219  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18220  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18221  }
18222  break;
18223  }
18224  }
18225  // This is a register plus the @lo relocation. The base is the register
18226  // and the displacement is the global address.
18227  else if (Flags & PPC::MOF_RPlusLo) {
18228  Disp = N.getOperand(1).getOperand(0); // The global address.
18231  Disp.getOpcode() == ISD::TargetConstantPool ||
18232  Disp.getOpcode() == ISD::TargetJumpTable);
18233  Base = N.getOperand(0);
18234  break;
18235  }
18236  // This is a constant address at most 32 bits. The base will be
18237  // zero or load-immediate-shifted and the displacement will be
18238  // the low 16 bits of the address.
18239  else if (Flags & PPC::MOF_AddrIsSImm32) {
18240  auto *CN = cast<ConstantSDNode>(N);
18241  EVT CNType = CN->getValueType(0);
18242  uint64_t CNImm = CN->getZExtValue();
18243  // If this address fits entirely in a 16-bit sext immediate field, codegen
18244  // this as "d, 0".
18245  int16_t Imm;
18246  if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18247  Disp = DAG.getTargetConstant(Imm, DL, CNType);
18248  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18249  CNType);
18250  break;
18251  }
18252  // Handle 32-bit sext immediate with LIS + Addr mode.
18253  if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18254  (!Align || isAligned(*Align, CNImm))) {
18255  int32_t Addr = (int32_t)CNImm;
18256  // Otherwise, break this down into LIS + Disp.
18257  Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18258  Base =
18259  DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18260  uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18261  Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18262  break;
18263  }
18264  }
18265  // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18266  Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18267  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18268  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18269  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18270  } else
18271  Base = N;
18272  break;
18273  }
18274  case PPC::AM_PrefixDForm: {
18275  int64_t Imm34 = 0;
18276  unsigned Opcode = N.getOpcode();
18277  if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18278  (isIntS34Immediate(N.getOperand(1), Imm34))) {
18279  // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18280  Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18281  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18282  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18283  else
18284  Base = N.getOperand(0);
18285  } else if (isIntS34Immediate(N, Imm34)) {
18286  // The address is a 34-bit signed immediate.
18287  Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18288  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18289  }
18290  break;
18291  }
18292  case PPC::AM_PCRel: {
18293  // When selecting PC-Relative instructions, "Base" is not utilized as
18294  // we select the address as [PC+imm].
18295  Disp = N;
18296  break;
18297  }
18298  case PPC::AM_None:
18299  break;
18300  default: { // By default, X-Form is always available to be selected.
18301  // When a frame index is not aligned, we also match by XForm.
18302  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18303  Base = FI ? N : N.getOperand(1);
18304  Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18305  N.getValueType())
18306  : N.getOperand(0);
18307  break;
18308  }
18309  }
18310  return Mode;
18311 }
18312 
18314  bool Return,
18315  bool IsVarArg) const {
18316  switch (CC) {
18317  case CallingConv::Cold:
18318  return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF_FIS);
18319  default:
18320  return CC_PPC64_ELF_FIS;
18321  }
18322 }
18323 
18325  // TODO: 16-byte atomic type support for AIX is in progress; we should be able
18326  // to inline 16-byte atomic ops on AIX too in the future.
18327  return Subtarget.isPPC64() &&
18328  (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
18329  Subtarget.hasQuadwordAtomics();
18330 }
18331 
18334  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18335  if (shouldInlineQuadwordAtomics() && Size == 128)
18337 
18338  switch (AI->getOperation()) {
18342  default:
18344  }
18345 
18346  llvm_unreachable("unreachable atomicrmw operation");
18347 }
18348 
18351  unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18352  if (shouldInlineQuadwordAtomics() && Size == 128)
18355 }
18356 
18357 static Intrinsic::ID
18359  switch (BinOp) {
18360  default:
18361  llvm_unreachable("Unexpected AtomicRMW BinOp");
18362  case AtomicRMWInst::Xchg:
18363  return Intrinsic::ppc_atomicrmw_xchg_i128;
18364  case AtomicRMWInst::Add:
18365  return Intrinsic::ppc_atomicrmw_add_i128;
18366  case AtomicRMWInst::Sub:
18367  return Intrinsic::ppc_atomicrmw_sub_i128;
18368  case AtomicRMWInst::And:
18369  return Intrinsic::ppc_atomicrmw_and_i128;
18370  case AtomicRMWInst::Or:
18371  return Intrinsic::ppc_atomicrmw_or_i128;
18372  case AtomicRMWInst::Xor:
18373  return Intrinsic::ppc_atomicrmw_xor_i128;
18374  case AtomicRMWInst::Nand:
18375  return Intrinsic::ppc_atomicrmw_nand_i128;
18376  }
18377 }
18378 
18380  IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18381  Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18382  assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18383  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18384  Type *ValTy = Incr->getType();
18385  assert(ValTy->getPrimitiveSizeInBits() == 128);
18388  Type *Int64Ty = Type::getInt64Ty(M->getContext());
18389  Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18390  Value *IncrHi =
18391  Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18392  Value *Addr =
18393  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18394  Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
18395  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18396  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18397  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18398  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18399  return Builder.CreateOr(
18400  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18401 }
18402 
18404  IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18405  Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18406  assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18407  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18408  Type *ValTy = CmpVal->getType();
18409  assert(ValTy->getPrimitiveSizeInBits() == 128);
18410  Function *IntCmpXchg =
18411  Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18412  Type *Int64Ty = Type::getInt64Ty(M->getContext());
18413  Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18414  Value *CmpHi =
18415  Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18416  Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18417  Value *NewHi =
18418  Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18419  Value *Addr =
18420  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18421  emitLeadingFence(Builder, CI, Ord);
18422  Value *LoHi =
18423  Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
18424  emitTrailingFence(Builder, CI, Ord);
18425  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18426  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18427  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18428  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18429  return Builder.CreateOr(
18430  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18431 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::PPCISD::READ_TIME_BASE
@ READ_TIME_BASE
Definition: PPCISelLowering.h:269
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:916
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:130
llvm::SelectionDAG::getMemcpy
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
Definition: SelectionDAG.cpp:7458
llvm::PPCRegisterInfo
Definition: PPCRegisterInfo.h:57
i
i
Definition: README.txt:29
llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:464
llvm::PPCII::MO_TLSGD_FLAG
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:128
llvm::PPCISD::MTCTR
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
Definition: PPCISelLowering.h:194
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1447
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:461
llvm::PPCII::MO_GOT_FLAG
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition: PPC.h:119
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3438
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
ValueTypes.h
llvm::APFloat::isDenormal
bool isDenormal() const
Definition: APFloat.h:1260
llvm::InlineAsm::Kind_Imm
@ Kind_Imm
Definition: InlineAsm.h:244
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::PPC::isXXINSERTWMask
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
Definition: PPCISelLowering.cpp:2228
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1586
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:99
DisableAutoPairedVecSt
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
llvm::PPCTargetLowering::getPICJumpTableRelocBase
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Definition: PPCISelLowering.cpp:3194
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2398
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:475
llvm::RISCVAttrs::StackAlign
StackAlign
Definition: RISCVAttributes.h:37
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::isConstOrConstSplat
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition: SelectionDAG.cpp:11031
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2309
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:586
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:206
llvm::SDValue::dump
void dump() const
Definition: SelectionDAGNodes.h:1193
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4884
llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition: TargetLowering.h:2497
llvm::XCOFF::XTY_ER
@ XTY_ER
External reference.
Definition: XCOFF.h:240
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:1022
llvm::ISD::isSignedIntSetCC
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1467
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1458
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1154
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1443
llvm::SDUse
Represents a use of a SDNode.
Definition: SelectionDAGNodes.h:284
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1383
llvm::PPCII::MO_PLT
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:106
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm::PPCSubtarget::getRegisterInfo
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
llvm::PPCTargetLowering::getJumpTableEncoding
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
Definition: PPCISelLowering.cpp:3179
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::PPC::isXXSLDWIShuffleMask
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
Definition: PPCISelLowering.cpp:2303
llvm::TargetLowering::getSqrtResultForDenormInput
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
Definition: TargetLowering.h:4845
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition: TargetOptions.h:221
llvm::ISD::JumpTable
@ JumpTable
Definition: ISDOpcodes.h:81
llvm::PPC::PRED_LT
@ PRED_LT
Definition: PPCPredicates.h:27
llvm::PPCFunctionInfo::appendParameterType
void appendParameterType(ParamType Type)
Definition: PPCMachineFunctionInfo.cpp:76
llvm::PPCISD::FCTIDZ
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
Definition: PPCISelLowering.h:73
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: MachineValueType.h:1140
llvm::CC_PPC32_SVR4_VarArg
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::PATCHPOINT
@ PATCHPOINT
Definition: ISDOpcodes.h:1303
llvm::MachineFrameInfo::hasVAStart
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
Definition: MachineFrameInfo.h:630
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:986
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
llvm::MCSectionXCOFF
Definition: MCSectionXCOFF.h:32
llvm::SectionKind::getMetadata
static SectionKind getMetadata()
Definition: SectionKind.h:188
llvm::PICLevel::SmallPIC
@ SmallPIC
Definition: CodeGen.h:36
llvm::PPCISD::FSQRT
@ FSQRT
Square root instruction.
Definition: PPCISelLowering.h:97
CalculateTailCallArgDest
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
Definition: PPCISelLowering.cpp:5035
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
llvm::PPCSubtarget::usesFunctionDescriptors
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:249
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:197
PPCRegisterInfo.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:833
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:376
llvm::TargetLowering::CallLoweringInfo::IsPatchPoint
bool IsPatchPoint
Definition: TargetLowering.h:4208
llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition: TargetMachine.cpp:146
llvm::PPCISD::STORE_COND
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
Definition: PPCISelLowering.h:602
llvm::PPCISD::BDNZ
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
Definition: PPCISelLowering.h:298
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
EmitTailCallStoreFPAndRetAddr
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
Definition: PPCISelLowering.cpp:5011
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:213
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1455
llvm::PPCISD::RFEBB
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
Definition: PPCISelLowering.h:430
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:885
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1459
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:157
llvm::PPC::isXXBRDShuffleMask
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
Definition: PPCISelLowering.cpp:2386
llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition: TargetLowering.cpp:5604
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:378
llvm::TargetLowering::ConstraintType
ConstraintType
Definition: TargetLowering.h:4620
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:990
llvm::MachineModuleInfo::getContext
const MCContext & getContext() const
Definition: MachineModuleInfo.h:139
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
Definition: SelectionDAG.h:2274
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition: SelectionDAGNodes.h:1600
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1414
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::PPCFunctionInfo::setTailCallSPDelta
void setTailCallSPDelta(int size)
Definition: PPCMachineFunctionInfo.h:184
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:168
llvm::PPCTargetLowering::getPreIndexedAddressParts
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Definition: PPCISelLowering.cpp:2995
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:427
llvm::PPCFunctionInfo::getMinReservedArea
unsigned getMinReservedArea() const
Definition: PPCMachineFunctionInfo.h:180
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:244
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:942
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:769
llvm::ISD::AssertSext
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
llvm::PPCISD::FP_EXTEND_HALF
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
Definition: PPCISelLowering.h:460
llvm::PPC::AM_PrefixDForm
@ AM_PrefixDForm
Definition: PPCISelLowering.h:743
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:423
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:348
llvm::PPCISD::VABSD
@ VABSD
An SDNode for Power9 vector absolute value difference.
Definition: PPCISelLowering.h:456
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51
llvm::HexagonISD::JT
@ JT
Definition: HexagonISelLowering.h:52
llvm::MVT::ppcf128
@ ppcf128
Definition: MachineValueType.h:61
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::isIntS34Immediate
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
Definition: PPCISelLowering.cpp:2635
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::Function
Definition: Function.h:59
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
llvm::PPCISD::CLRBHRB
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
Definition: PPCISelLowering.h:423
llvm::PPCII::MO_GOT_TLSLD_PCREL_FLAG
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:150
StringRef.h
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::ISD::STRICT_UINT_TO_FP
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:449
llvm::PPC::PRED_GE
@ PRED_GE
Definition: PPCPredicates.h:30
computeFlagsForAddressComputation
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
Definition: PPCISelLowering.cpp:17820
llvm::PPCFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex() const
Definition: PPCMachineFunctionInfo.h:224
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1015
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:424
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:50
llvm::PPCFunctionInfo::VectorChar
@ VectorChar
Definition: PPCMachineFunctionInfo.h:30
llvm::PPC::AM_DForm
@ AM_DForm
Definition: PPCISelLowering.h:740
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:975
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:744
DisablePPCPreinc
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1884
llvm::CCState::addLoc
void addLoc(const CCValAssign &V)
Definition: CallingConvLower.h:230
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:432
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
DM
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
OP_COPY
@ OP_COPY
Definition: ARMISelLowering.cpp:8311
llvm::PPC::MOF_SubtargetP9
@ MOF_SubtargetP9
Definition: PPCISelLowering.h:732
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::ISD::INIT_TRAMPOLINE
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1123
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition: TargetLowering.h:3436
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:46
llvm::PPCTargetLowering::shouldExpandAtomicRMWInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: PPCISelLowering.cpp:18333
llvm::ISD::FSHL
@ FSHL
Definition: ISDOpcodes.h:696
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:31
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:812
CC_AIX
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
Definition: PPCISelLowering.cpp:6592
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:730
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition: SelectionDAG.cpp:1765
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition: TargetLowering.h:1228
Statistic.h
getIntrinsicForAtomicRMWBinOp128
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
Definition: PPCISelLowering.cpp:18358
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1454
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:392
llvm::PPCTargetLowering::isAccessedAsGotIndirect
bool isAccessedAsGotIndirect(SDValue N) const
Definition: PPCISelLowering.cpp:16609
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1056
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9508
llvm::PPCISD::MTVSRA
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
Definition: PPCISelLowering.h:225
llvm::PPCISD::SEXT_LD_SPLAT
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
Definition: PPCISelLowering.h:578
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:469
DisablePerfectShuffle
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2901
llvm::PPC::AM_None
@ AM_None
Definition: PPCISelLowering.h:739
llvm::SelectionDAG::isSplatValue
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
Definition: SelectionDAG.cpp:2611
ErrorHandling.h
llvm::Sched::ILP
@ ILP
Definition: TargetLowering.h:103
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::PPC::MOF_NotAddNorCst
@ MOF_NotAddNorCst
Definition: PPCISelLowering.h:712
llvm::PPCFunctionInfo::addLiveInAttr
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
Definition: PPCMachineFunctionInfo.h:248
llvm::PPCISD::MAT_PCREL_ADDR
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
Definition: PPCISelLowering.h:465
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::PPCISD::ACC_BUILD
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
Definition: PPCISelLowering.h:478
llvm::PPCTargetLowering::expandVSXLoadForLE
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
Definition: PPCISelLowering.cpp:14707
llvm::XCOFF::XMC_PR
@ XMC_PR
Program Code.
Definition: XCOFF.h:104
llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
llvm::TargetLowering::lowerCmpEqZeroToCtlzSrl
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:9477
llvm::PPCSubtarget::getFrameLowering
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
llvm::PPCCCState
Definition: PPCCCState.h:19
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1355
llvm::PPCSubtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:208
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:919
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1378
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::TargetLowering::getPICJumpTableRelocBase
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
Definition: TargetLowering.cpp:454
llvm::PPCInstrInfo
Definition: PPCInstrInfo.h:212
llvm::PPCISD::VPERM
@ VPERM
VPERM - The PPC VPERM Instruction.
Definition: PPCISelLowering.h:101
llvm::ISD::USUBSAT
@ USUBSAT
Definition: ISDOpcodes.h:350
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
LowerLabelRef
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:3095
llvm::PPCTargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Definition: PPCISelLowering.cpp:16798
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
MachineBasicBlock.h
llvm::GlobalAddressSDNode::getTargetFlags
unsigned getTargetFlags() const
Definition: SelectionDAGNodes.h:1777
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3939
llvm::PPC::MOF_RPlusSImm16Mult16
@ MOF_RPlusSImm16Mult16
Definition: PPCISelLowering.h:716
llvm::PPCISD::STXSIX
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
Definition: PPCISelLowering.h:546
llvm::TargetLoweringBase::getPrefLoopAlignment
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Definition: TargetLoweringBase.cpp:2019
llvm::SelectionDAG::ReplaceAllUsesWith
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition: SelectionDAG.cpp:10380
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
llvm::PPCTargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
Definition: PPCISelLowering.cpp:16641
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2348
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:282
llvm::MemOp
Definition: TargetLowering.h:112
llvm::PPCTargetLowering::getSingleConstraintMatchWeight
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
Definition: PPCISelLowering.cpp:16194
llvm::PPC::isSplatShuffleMask
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
Definition: PPCISelLowering.cpp:2156
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:745
R4
#define R4(n)
llvm::TargetLoweringBase::shouldExpandAtomicRMWInIR
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: TargetLowering.h:2157
APInt.h
llvm::PPCISD::BUILD_SPE64
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
Definition: PPCISelLowering.h:237
areCallingConvEligibleForTCO_64SVR4
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
Definition: PPCISelLowering.cpp:4824
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:236
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition: TargetLowering.cpp:46
llvm::CallBase::isStrictFP
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1864
llvm::TargetLowering::C_Memory
@ C_Memory
Definition: TargetLowering.h:4623
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:315
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:799
Shift
bool Shift
Definition: README.txt:468
DisablePPCUnaligned
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1449
llvm::PPCFrameLowering::getTOCSaveOffset
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
Definition: PPCFrameLowering.cpp:2698
llvm::PPC::isXXPERMDIShuffleMask
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
Definition: PPCISelLowering.cpp:2402
MachineJumpTableInfo.h
llvm::TargetMachine::getRelocationModel
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
Definition: TargetMachine.cpp:68
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
llvm::PPCISD::FADDRTZ
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
Definition: PPCISelLowering.h:304
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::BranchProbability::getZero
static BranchProbability getZero()
Definition: BranchProbability.h:49
Module.h
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::PPCISD::Lo
@ Lo
Definition: PPCISelLowering.h:138
llvm::PPCII::MO_LO
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:161
llvm::PPCTargetLowering::ccAssignFnForCall
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
Definition: PPCISelLowering.cpp:18313
llvm::PPCTargetLowering::getByValTypeAlignment
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
Definition: PPCISelLowering.cpp:1594
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:749
llvm::AttributeList
Definition: Attributes.h:432
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
TargetInstrInfo.h
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1380
llvm::PPCSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Scheduling customization.
Definition: PPCSubtarget.cpp:130
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::PPC::DIR_E500
@ DIR_E500
Definition: PPCSubtarget.h:51
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:8168
llvm::PPCISD::XXSPLT
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
Definition: PPCISelLowering.h:105
llvm::PPCISD::LD_GOT_TPREL_L
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
Definition: PPCISelLowering.h:337
llvm::PPCISD::FCFIDU
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
Definition: PPCISelLowering.h:66
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
isConstantOrUndef
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
Definition: PPCISelLowering.cpp:1823
llvm::MachineRegisterInfo::getLiveInVirtReg
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
Definition: MachineRegisterInfo.cpp:458
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4814
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:889
llvm::PPCISD::STRICT_FCTIDZ
@ STRICT_FCTIDZ
Definition: PPCISelLowering.h:493
llvm::PPCISD::FNMSUB
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
Definition: PPCISelLowering.h:172
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
mapArgRegToOffsetAIX
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
Definition: PPCISelLowering.cpp:6865
llvm::PPCTargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition: PPCISelLowering.cpp:16054
llvm::PPCISD::XXSPLTI32DX
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
Definition: PPCISelLowering.h:114
llvm::ISD::CALLSEQ_START
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1070
llvm::PPCII::MO_GOT_TLSGD_PCREL_FLAG
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:145
llvm::CCState::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: CallingConvLower.h:235
llvm::PPC::MOF_RPlusSImm34
@ MOF_RPlusSImm34
Definition: PPCISelLowering.h:717
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::PPCFunctionInfo::setVarArgsNumGPR
void setVarArgsNumGPR(unsigned Num)
Definition: PPCMachineFunctionInfo.h:231
llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:841
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
llvm::PPC::AM_DQForm
@ AM_DQForm
Definition: PPCISelLowering.h:742
llvm::CallBase::isMustTailCall
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: Instructions.cpp:309
llvm::PPCTargetLowering::emitEHSjLjSetJmp
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:11925
llvm::TargetLowering::CallLoweringInfo::CB
const CallBase * CB
Definition: TargetLowering.h:4225
llvm::InlineAsm::Kind_RegDef
@ Kind_RegDef
Definition: InlineAsm.h:241
llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition: TargetLowering.h:2519
llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition: SelectionDAGBuilder.cpp:9869
FPR
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
Definition: PPCISelLowering.cpp:3884
llvm::PPCFunctionInfo
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
Definition: PPCMachineFunctionInfo.h:24
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1275
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::PPC::AM_DSForm
@ AM_DSForm
Definition: PPCISelLowering.h:741
llvm::PPCISD::DYNALLOC
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
Definition: PPCISelLowering.h:146
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
llvm::TargetLoweringBase::isJumpTableRelative
virtual bool isJumpTableRelative() const
Definition: TargetLoweringBase.cpp:2015
llvm::ISD::FMAXNUM_IEEE
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:950
fixupShuffleMaskForPermutedSToV
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:14947
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:769
llvm::TargetLoweringBase::getVectorIdxTy
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
Definition: TargetLowering.h:419
llvm::PPCTargetLowering::EmitPartwordAtomicBinary
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
Definition: PPCISelLowering.cpp:11711
llvm::SelectionDAG::getCommutedVectorShuffle
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
Definition: SelectionDAG.cpp:2137
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:899
llvm::codeview::EncodedFramePtrReg::StackPtr
@ StackPtr
getVectorCompareInfo
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
Definition: PPCISelLowering.cpp:10299
STLExtras.h
llvm::PPCFunctionInfo::VectorInt
@ VectorInt
Definition: PPCMachineFunctionInfo.h:32
llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1085
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1326
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1414
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::PPCISD::MFBHRBE
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
Definition: PPCISelLowering.h:427
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition: SelectionDAG.cpp:1446
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
llvm::PPC::PRED_GT
@ PRED_GT
Definition: PPCPredicates.h:31
llvm::TargetLoweringBase::shouldSignExtendTypeInLibCall
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
Definition: TargetLowering.h:2109
llvm::minidump::MemoryType
MemoryType
Definition: Minidump.h:98
stripModuloOnShift
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:17202
llvm::BlockAddressSDNode
Definition: SelectionDAGNodes.h:2199
llvm::PPCSubtarget::getStackPointerRegister
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:279
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:190
Format.h
getCallOpcode
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, const Function &Caller, const SDValue &Callee, const PPCSubtarget &Subtarget, const TargetMachine &TM, bool IsStrictFPCall=false)
Definition: PPCISelLowering.cpp:5234
llvm::PPCTargetLowering::emitLeadingFence
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
Definition: PPCISelLowering.cpp:11506
llvm::TargetLoweringBase::emitPatchPoint
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
Definition: TargetLoweringBase.cpp:1165
isStoreConditional
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
Definition: PPCISelLowering.cpp:15250
llvm::PPCTargetLowering::CallFlags::IsPatchPoint
const bool IsPatchPoint
Definition: PPCISelLowering.h:1176
llvm::PPCISD::ZEXT_LD_SPLAT
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
Definition: PPCISelLowering.h:574
llvm::PPC::MOF_SubtargetP10
@ MOF_SubtargetP10
Definition: PPCISelLowering.h:733
SelectionDAG.h
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::PPCTargetLowering::shouldExpandBuildVectorWithShuffles
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
Definition: PPCISelLowering.cpp:17041
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:470
llvm::PPCISD::LBRX
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
Definition: PPCISelLowering.h:522
DisableILPPref
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
llvm::AtomicRMWInst::UDecWrap
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:774
llvm::PPCSubtarget::is64BitELFABI
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
llvm::InlineAsm::Kind_RegDefEarlyClobber
@ Kind_RegDefEarlyClobber
Definition: InlineAsm.h:242
Use.h
llvm::PPCISD::FCFID
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
Definition: PPCISelLowering.h:62
llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:442
llvm::PPCTargetLowering::expandVSXStoreForLE
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
Definition: PPCISelLowering.cpp:14773
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1445
llvm::TypeSize::Fixed
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition: TypeSize.h:331
llvm::PPC::isVPKUHUMShuffleMask
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Definition: PPCISelLowering.cpp:1833
CalculateStackSlotSize
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
Definition: PPCISelLowering.cpp:3890
llvm::ISD::SMAX
@ SMAX
Definition: ISDOpcodes.h:661
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition: TargetLowering.h:2439
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:481
llvm::convertToNonDenormSingle
bool convertToNonDenormSingle(APInt &ArgAPInt)
Definition: PPCISelLowering.cpp:9117
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:911
llvm::CC_PPC64_ELF_FIS
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::PPCISD::BDZ
@ BDZ
Definition: PPCISelLowering.h:299
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::PPCTargetLowering::PPCTargetLowering
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
Definition: PPCISelLowering.cpp:154
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1000
llvm::ISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
MachineRegisterInfo.h
llvm::PPCISD::ADDI_TLSLD_L
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
Definition: PPCISelLowering.h:386
KnownBits.h
llvm::TargetLoweringBase::getShiftAmountTy
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
Definition: TargetLoweringBase.cpp:917
llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition: SelectionDAGNodes.h:1528
llvm::PPC::MOF_SubtargetSPE
@ MOF_SubtargetSPE
Definition: PPCISelLowering.h:734
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2147
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition: TargetLowering.h:2422
llvm::XCOFF::CsectProperties
Definition: XCOFF.h:470
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::ISD::INLINEASM
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1024
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
fixupFuncForFI
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
Definition: PPCISelLowering.cpp:2708
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:427
llvm::GlobalValue::getSection
StringRef getSection() const
Definition: Globals.cpp:175
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::TargetLowering::getNegatedExpression
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
Definition: TargetLowering.cpp:6764
llvm::Reloc::Model
Model
Definition: CodeGen.h:25
llvm::ISD::ROTL
@ ROTL
Definition: ISDOpcodes.h:694
llvm::TargetLoweringObjectFile
Definition: TargetLoweringObjectFile.h:45
llvm::PPC::isXXBRWShuffleMask
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
Definition: PPCISelLowering.cpp:2382
PerfectShuffleTable
static const unsigned PerfectShuffleTable[6561+1]
Definition: AArch64PerfectShuffle.h:25
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::PPCISD::UINT_VEC_TO_FP
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
Definition: PPCISelLowering.h:250
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:929
llvm::PPCISD::ANDI_rec_1_GT_BIT
@ ANDI_rec_1_GT_BIT
Definition: PPCISelLowering.h:265
llvm::PPCTargetLowering::CallFlags::HasNest
const bool HasNest
Definition: PPCISelLowering.h:1178
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2511
llvm::CC_PPC32_SVR4_ByVal
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
llvm::TargetLowering::isInTailCallPosition
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
Definition: TargetLowering.cpp:52
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::TargetLowering::CallLoweringInfo::setSExtResult
CallLoweringInfo & setSExtResult(bool Value=true)
Definition: TargetLowering.h:4328
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1525
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:772
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:929
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
PPCSubtarget.h
CommandLine.h
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1414
llvm::PPCISD::STXVD2X
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:583
llvm::PPCII::MO_PCREL_FLAG
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:114
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::ISD::STRICT_FDIV
@ STRICT_FDIV
Definition: ISDOpcodes.h:403
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1007
TargetLowering.h
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::Instruction::hasAtomicLoad
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
Definition: Instruction.cpp:681
llvm::BlockAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:2213
llvm::ISD::FSHR
@ FSHR
Definition: ISDOpcodes.h:697
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:8118
llvm::PPCISD::STRICT_FCTIDUZ
@ STRICT_FCTIDUZ
Definition: PPCISelLowering.h:495
llvm::PPCTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
Definition: PPCISelLowering.cpp:11297
llvm::PPCISD::STFIWX
@ STFIWX
STFIWX - The STFIWX instruction.
Definition: PPCISelLowering.h:526
R2
#define R2(n)
llvm::PPC::MOF_RPlusLo
@ MOF_RPlusLo
Definition: PPCISelLowering.h:714
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:644
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:362
llvm::TargetFrameLowering::getStackAlignment
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:95
llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition: TargetLowering.h:4203
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:682
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:441
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:37
llvm::PPCISD::CR6SET
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
Definition: PPCISelLowering.h:317
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:312
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1187
llvm::PPCFrameLowering::getReturnSaveOffset
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
Definition: PPCFrameLowering.h:149
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
isSplat
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
Definition: LowerMatrixIntrinsics.cpp:111
llvm::TargetOptions::NoInfsFPMath
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
Definition: TargetOptions.h:169
EnsureStackAlignment
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
Definition: PPCISelLowering.cpp:3996
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:728
llvm::ExternalSymbolSDNode
Definition: SelectionDAGNodes.h:2241
llvm::PPCTargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Definition: PPCISelLowering.cpp:16818
llvm::PPCISD::TC_RETURN
@ TC_RETURN
TC_RETURN - A tail call return.
Definition: PPCISelLowering.h:314
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
GlobalValue.h
llvm::PPCISD::STRICT_FCFIDUS
@ STRICT_FCFIDUS
Definition: PPCISelLowering.h:502
MachineLoopInfo.h
haveEfficientBuildVectorPattern
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
Definition: PPCISelLowering.cpp:9026
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:722
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1141
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
llvm::PPCTargetLowering::preferIncOfAddToSubOfNot
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Definition: PPCISelLowering.cpp:1612
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1502
CalculateStackSlotUsed
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
Definition: PPCISelLowering.cpp:3948
llvm::AtomicCmpXchgInst::getNewValOperand
Value * getNewValOperand()
Definition: Instructions.h:650
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
isFunctionGlobalAddress
static bool isFunctionGlobalAddress(SDValue Callee)
Definition: PPCISelLowering.cpp:5131
CalculateStackSlotAlignment
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
Definition: PPCISelLowering.cpp:3906
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:420
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
SelectionDAGNodes.h
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
Constants.h
llvm::SelectionDAG::UpdateNodeOperands
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
Definition: SelectionDAG.cpp:9598
llvm::PPCFunctionInfo::setReturnAddrSaveIndex
void setReturnAddrSaveIndex(int idx)
Definition: PPCMachineFunctionInfo.h:165
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition: SelectionDAGNodes.h:718
llvm::PPCISD::GlobalBaseReg
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
Definition: PPCISelLowering.h:158
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:421
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::PPCTargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Definition: PPCISelLowering.cpp:17169
llvm::SelectionDAG::getBoolExtOrTrunc
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
Definition: SelectionDAG.cpp:1452
llvm::PPCSubtarget
Definition: PPCSubtarget.h:71
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:85
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3945
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:815
llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:764
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
llvm::PPCTargetLowering::hasSPE
bool hasSPE() const
Definition: PPCISelLowering.cpp:1608
llvm::PPCSubtarget::getTOCPointerRegister
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:273
InlinePriorityMode::Cost
@ Cost
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1456
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8220
llvm::PPCISD::XSMINC
@ XSMINC
Definition: PPCISelLowering.h:57
llvm::PPCISD::LFIWZX
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
Definition: PPCISelLowering.h:536
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:126
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:770
llvm::PPCFrameLowering
Definition: PPCFrameLowering.h:22
llvm::SelectionDAG::getObjectPtrOffset
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition: SelectionDAG.h:1005
llvm::PPC::MOF_RPlusR
@ MOF_RPlusR
Definition: PPCISelLowering.h:718
llvm::User
Definition: User.h:44
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:1057
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:2664
llvm::PPC::MOF_ZExt
@ MOF_ZExt
Definition: PPCISelLowering.h:708
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::Function::getFnAttributeAsParsedInteger
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:674
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:474
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:422
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2598
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1540
llvm::PPCISD::XXSPLTI_SP_TO_DP
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
Definition: PPCISelLowering.h:110
llvm::JumpTableSDNode
Definition: SelectionDAGNodes.h:1866
llvm::PPC::DIR_PWR6X
@ DIR_PWR6X
Definition: PPCSubtarget.h:59
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1406
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::GlobalObject
Definition: GlobalObject.h:27
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3515
StoreTailCallArgumentsToStackSlot
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
Definition: PPCISelLowering.cpp:4994
llvm::PPCTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
Definition: PPCISelLowering.cpp:11389
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
BuildVSLDOI
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
Definition: PPCISelLowering.cpp:9001
MCContext.h
CalculateTailCallSPDiff
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
Definition: PPCISelLowering.cpp:4660
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::PPCISD::VCMP_rec
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
Definition: PPCISelLowering.h:287
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition: SelectionDAG.cpp:2559
llvm::PPCTargetLowering::CallFlags::CallConv
const CallingConv::ID CallConv
Definition: PPCISelLowering.h:1173
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:927
isXXBRShuffleMaskHelper
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
Definition: PPCISelLowering.cpp:2365
llvm::CodeGenOpt::Aggressive
@ Aggressive
-O3
Definition: CodeGen.h:61
llvm::PPCTargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Definition: PPCISelLowering.cpp:16477
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3933
llvm::PPCTargetLowering::CallFlags
Structure that collects some common arguments that get passed around between the functions for call l...
Definition: PPCISelLowering.h:1172
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:16879
llvm::PPCFunctionInfo::setVarArgsNumFPR
void setVarArgsNumFPR(unsigned Num)
Definition: PPCMachineFunctionInfo.h:245
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:341
PPCCCState.h
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:234
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::ISD::FMINNUM_IEEE
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:949
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:931
llvm::PPCFunctionInfo::setUsesTOCBasePtr
void setUsesTOCBasePtr()
Definition: PPCMachineFunctionInfo.h:218
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:258
llvm::PPCISD::ATOMIC_CMP_SWAP_8
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
Definition: PPCISelLowering.h:596
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::TargetLowering::CallLoweringInfo::setDebugLoc
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
Definition: TargetLowering.h:4238
llvm::PPCISD::VADD_SPLAT
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
Definition: PPCISelLowering.h:416
ANDIGlueBug
cl::opt< bool > ANDIGlueBug
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MachinePointerInfo::getGOT
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition: MachineOperand.cpp:1058
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
callIntrinsic
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
Definition: PPCISelLowering.cpp:11498
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
llvm::ISD::DELETED_NODE
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2356
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3474
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:31
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:666
llvm::PPCFunctionInfo::setHasNonRISpills
void setHasNonRISpills()
Definition: PPCMachineFunctionInfo.h:206
llvm::ISD::isUNINDEXEDLoad
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
Definition: SelectionDAGNodes.h:3080
llvm::TargetOptions::PPCGenScalarMASSEntries
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
Definition: TargetOptions.h:355
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:806
PPCFrameLowering.h
llvm::PPC::MOF_Vector
@ MOF_Vector
Definition: PPCISelLowering.h:727
llvm::PPCTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
Definition: PPCISelLowering.cpp:16160
llvm::PPCTargetLowering::getNegatedExpression
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
Definition: PPCISelLowering.cpp:17079
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:467
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
llvm::Instruction
Definition: Instruction.h:41
llvm::PPCTargetLowering::emitMaskedAtomicRMWIntrinsic
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
Definition: PPCISelLowering.cpp:18379
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
llvm::DataLayout::getABITypeAlign
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:848
llvm::PPCSubtarget::descriptorTOCAnchorOffset
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:255
llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition: SelectionDAGNodes.h:1540
llvm::PPCTargetLowering::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
Definition: PPCISelLowering.cpp:16966
getEstimateRefinementSteps
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:13173
isSplatBV
static bool isSplatBV(SDValue Op)
Definition: PPCISelLowering.cpp:14909
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:182
llvm::APSInt
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
llvm::PPC::DIR_PWR5X
@ DIR_PWR5X
Definition: PPCSubtarget.h:57
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:740
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::PPC::PRED_BIT_SET
@ PRED_BIT_SET
Definition: PPCPredicates.h:57
llvm::PPCISD::SRA_ADDZE
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Definition: PPCISelLowering.h:182
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:256
llvm::PPCISD::MFVSR
@ MFVSR
Direct move from a VSX register to a GPR.
Definition: PPCISelLowering.h:222
llvm::CCValAssign::getCustomMem
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:101
llvm::TargetLoweringBase::getSDagStackGuard
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: TargetLoweringBase.cpp:1987
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
APFloat.h
llvm::DataLayout::getLargestLegalIntTypeSizeInBits
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:883
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:926
PPC.h
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:925
llvm::ISD::FP16_TO_FP
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:895
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1165
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1789
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
llvm::PPCTargetLowering::SelectAddressPCRel
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
Definition: PPCISelLowering.cpp:2935
UseAbsoluteJumpTables
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
llvm::StringRef::data
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7893
llvm::AArch64PACKey::IA
@ IA
Definition: AArch64BaseInfo.h:791
llvm::wasm::ValType
ValType
Definition: Wasm.h:424
llvm::MVT::INVALID_SIMPLE_VALUE_TYPE
@ INVALID_SIMPLE_VALUE_TYPE
Definition: MachineValueType.h:38
llvm::PPCTargetLowering::SelectAddressRegReg
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
Definition: PPCISelLowering.cpp:2652
DebugLoc.h
llvm::PPCISD::PPC32_GOT
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
Definition: PPCISelLowering.h:322
SmallPtrSet.h
llvm::PPC::isVSLDOIShuffleMask
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
Definition: PPCISelLowering.cpp:2112
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:526
llvm::PPC::MOF_DoubleWordInt
@ MOF_DoubleWordInt
Definition: PPCISelLowering.h:725
llvm::PPCISD::XXSWAPD
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
Definition: PPCISelLowering.h:437
llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition: TargetLowering.h:3451
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
BuildIntrinsicOp
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
Definition: PPCISelLowering.cpp:8972
llvm::PPCTargetLowering::CallFlags::IsIndirect
const bool IsIndirect
Definition: PPCISelLowering.h:1177
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:203
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:71
llvm::PPCISD::ADDIS_GOT_TPREL_HA
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
Definition: PPCISelLowering.h:331
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:132
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1440
llvm::ISD::TargetGlobalAddress
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::PPCSubtarget::getInstrInfo
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
llvm::TargetLoweringBase::insertSSPDeclarations
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: TargetLoweringBase.cpp:1971
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1775
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1174
llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:959
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:127
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1295
llvm::CallInst::isTailCall
bool isTailCall() const
Definition: Instructions.h:1677
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::isOSAIX
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:669
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2304
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:732
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1153
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
llvm::CallBase::getCallingConv
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1465
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:398
MCSectionXCOFF.h
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:312
isSignExtended
static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII)
Definition: PPCISelLowering.cpp:11655
llvm::PPCISD::SRL
@ SRL
These nodes represent PPC shifts.
Definition: PPCISelLowering.h:167
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:734
prepareIndirectCall
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
Definition: PPCISelLowering.cpp:5389
llvm::SelectionDAG::getEVTAlign
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
Definition: SelectionDAG.cpp:1293
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:100
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:190
convertIntToFP
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
Definition: PPCISelLowering.cpp:8376
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:120
llvm::TargetLowering::makeLibCall
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Definition: TargetLowering.cpp:144
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:528
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:87
llvm::MachineInstr::NoFPExcept
@ NoFPExcept
Definition: MachineInstr.h:110
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::InlineAsm::getKind
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:351
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
llvm::RISCVISD::DIVW
@ DIVW
Definition: RISCVISelLowering.h:71
llvm::PPCISD::MTVSRZ
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
Definition: PPCISelLowering.h:228
Type.h
llvm::PPCISD::XXPERM
@ XXPERM
Definition: PPCISelLowering.h:127
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1441
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
BranchProbability.h
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::TargetLoweringObjectFile::getFunctionEntryPointSymbol
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
Definition: TargetLoweringObjectFile.h:282
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1446
llvm::PPCTargetLowering::SelectOptimalAddrMode
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Definition: PPCISelLowering.cpp:18178
llvm::PPCISD::LFIWAX
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
Definition: PPCISelLowering.h:531
llvm::PPCISD::STORE_VEC_BE
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:588
provablyDisjointOr
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
Definition: PPCISelLowering.cpp:2605
llvm::EVT::isExtended
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:134
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:717
llvm::MachineJumpTableInfo::EK_LabelDifference32
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
Definition: MachineJumpTableInfo.h:68
llvm::PPC::DIR_970
@ DIR_970
Definition: PPCSubtarget.h:49
llvm::PPCSubtarget::isGVIndirectSymbol
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Definition: PPCSubtarget.cpp:172
llvm::PPCTargetLowering::getSDagStackGuard
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: PPCISelLowering.cpp:17163
llvm::PPCISD::FP_TO_SINT_IN_VSR
@ FP_TO_SINT_IN_VSR
Definition: PPCISelLowering.h:83
llvm::PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
Definition: PPCISelLowering.cpp:18403
llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition: MachineInstrBuilder.h:213
llvm::PPCTargetLowering::getPrefLoopAlignment
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Definition: PPCISelLowering.cpp:16109
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:661
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:366
llvm::PPCII::MO_GOT_TPREL_PCREL_FLAG
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:155
uint64_t
Class for arbitrary precision integers APInt is a functional replacement for common case unsigned integer type like unsigned long or uint64_t
Definition: tmp.txt:1
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:154
llvm::PPCISD::XXMFACC
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
Definition: PPCISelLowering.h:490
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::PPCISD::FCTIWZ
@ FCTIWZ
Definition: PPCISelLowering.h:74
llvm::CodeModel::Model
Model
Definition: CodeGen.h:31
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:917
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:222
llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition: TargetLowering.h:3492
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::PPCFunctionInfo::isLRStoreRequired
bool isLRStoreRequired() const
Definition: PPCMachineFunctionInfo.h:216
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:86
llvm::ISD::BlockAddress
@ BlockAddress
Definition: ISDOpcodes.h:84
llvm::PPC::PRED_EQ
@ PRED_EQ
Definition: PPCPredicates.h:29
llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition: TargetLowering.h:4226
llvm::MachineFunction::getMMI
MachineModuleInfo & getMMI() const
Definition: MachineFunction.h:623
llvm::PPC::MOF_None
@ MOF_None
Definition: PPCISelLowering.h:704
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::TargetLowering::getPICJumpTableRelocBaseExpr
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
Definition: TargetLowering.cpp:469
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:476
llvm::TargetLowering::CallLoweringInfo::setLibCallee
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
Definition: TargetLowering.h:4249
llvm::PPCSubtarget::isAIXABI
bool isAIXABI() const
Definition: PPCSubtarget.h:214
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:672
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition: MachineInstrBuilder.h:152
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2373
llvm::PPCSubtarget::isSVR4ABI
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3453
llvm::PPCTargetLowering::SelectAddressRegRegOnly
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
Definition: PPCISelLowering.cpp:2897
llvm::PPCISD::CALL_NOP
@ CALL_NOP
Definition: PPCISelLowering.h:189
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:278
llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition: TargetLowering.cpp:289
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:660
llvm::PPC::isXXBRHShuffleMask
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
Definition: PPCISelLowering.cpp:2378
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:640
llvm::PPCISD::ADD_TLS
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
Definition: PPCISelLowering.h:345
llvm::cl::opt< bool >
llvm::APFloat
Definition: APFloat.h:744
llvm::SDNode::use_begin
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Definition: SelectionDAGNodes.h:800
llvm::NVPTX::PTXLdStInstCode::V4
@ V4
Definition: NVPTX.h:125
llvm::CCValAssign::LocInfo
LocInfo
Definition: CallingConvLower.h:33
llvm::ISD::Register
@ Register
Definition: ISDOpcodes.h:74
llvm::ISD::GET_DYNAMIC_AREA_OFFSET
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1244
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:277
llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition: SelectionDAGNodes.h:1145
llvm::PPCFrameLowering::getLinkageSize
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
Definition: PPCFrameLowering.h:165
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::PPCSubtarget::isTargetLinux
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
llvm::peekThroughBitcasts
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
Definition: SelectionDAG.cpp:11003
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:541
llvm::PPCTargetLowering::getPICJumpTableRelocBaseExpr
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
Definition: PPCISelLowering.cpp:3210
llvm::PPCTargetLowering::getExceptionSelectorRegister
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Definition: PPCISelLowering.cpp:17035
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
llvm::PPCII::MO_TPREL_HA
@ MO_TPREL_HA
Definition: PPC.h:165
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
unsigned
Class for arbitrary precision integers APInt is a functional replacement for common case unsigned integer type like unsigned
Definition: tmp.txt:1
type
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference and DH registers in an instruction requiring REX prefix divb and mulb both produce results in AH If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a r8b r15b To get around isel emits a CopyFromReg from AX and then right shift it down by and truncate it It s not pretty but it works We need some register allocation magic to make the hack go which would often require a callee saved register Callees usually need to keep this value live for most of their body so it doesn t add a significant burden on them We currently implement this in however this is suboptimal because it means that it would be quite awkward to implement the optimization for callers A better implementation would be to relax the LLVM IR rules for sret arguments to allow a function with an sret argument to have a non void return type
Definition: README-X86-64.txt:70
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:920
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
AIXSSPCanaryWordName
static const char AIXSSPCanaryWordName[]
Definition: PPCISelLowering.cpp:149
llvm::CCValAssign::getMem
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Definition: CallingConvLower.h:94
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:35
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:736
findConsecutiveLoad
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13414
llvm::PPCII::MO_TLSGDM_FLAG
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:140
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1896
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::PPCISD::LXSIZX
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
Definition: PPCISelLowering.h:541
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:131
llvm::SelectionDAG::MaxRecursionDepth
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:444
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:47
llvm::PPC::MOF_ScalarFloat
@ MOF_ScalarFloat
Definition: PPCISelLowering.h:726
llvm::PPCISD::CALL_RM
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
Definition: PPCISelLowering.h:207
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:470
RuntimeLibcalls.h
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:820
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1635
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:942
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1359
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
llvm::TargetLowering::verifyReturnAddressArgumentIsConstant
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:6732
llvm::PPCTargetLowering::splitValueIntoRegisterParts
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Definition: PPCISelLowering.cpp:18034
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:965
llvm::PPCISD::MFFS
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
Definition: PPCISelLowering.h:307
llvm::PPCISD::FIRST_NUMBER
@ FIRST_NUMBER
Definition: PPCISelLowering.h:49
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::TargetLowering::CallLoweringInfo::Chain
SDValue Chain
Definition: TargetLowering.h:4199
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1660
llvm::PPC::isVMRGHShuffleMask
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
Definition: PPCISelLowering.cpp:1993
llvm::PPCTargetLowering::LowerAsmOperandForConstraint
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
Definition: PPCISelLowering.cpp:16383
llvm::ISD::AssertZext
@ AssertZext
Definition: ISDOpcodes.h:62
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:198
llvm::PPC::MOF_SExt
@ MOF_SExt
Definition: PPCISelLowering.h:707
llvm::PPCISD::CALL_NOTOC_RM
@ CALL_NOTOC_RM
Definition: PPCISelLowering.h:209
llvm::TargetLoweringBase::NegatibleCost::Expensive
@ Expensive
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1132
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:39
llvm::TargetLowering::getJumpTableEncoding
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Definition: TargetLowering.cpp:441
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::PPCSubtarget::is32BitELFABI
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
llvm::PPCSubtarget::needsSwapsForVSXMemOps
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
prepareDescriptorIndirectCall
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5400
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:600
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:795
MCSymbolXCOFF.h
llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition: TargetLowering.h:4220
isAlternatingShuffMask
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
Definition: PPCISelLowering.cpp:14895
llvm::ISD::OutputArg
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
Definition: TargetCallingConv.h:233
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition: TargetLowering.h:2322
PPCInstrInfo.h
llvm::GlobalValue::hasComdat
bool hasComdat() const
Definition: GlobalValue.h:237
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
llvm::TargetLowering::CW_Register
@ CW_Register
Definition: TargetLowering.h:4640
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:932
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1800
llvm::PPC::AddrMode
AddrMode
Definition: PPCISelLowering.h:738
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:9202
llvm::InlineAsm::Op_FirstOperand
@ Op_FirstOperand
Definition: InlineAsm.h:222
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition: TargetLowering.h:3944
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:484
llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition: TargetLowering.h:2525
llvm::ISD::FP_TO_FP16
@ FP_TO_FP16
Definition: ISDOpcodes.h:896
combineBVOfConsecutiveLoads
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
Definition: PPCISelLowering.cpp:14237
llvm::DenormalMode
Represent subnormal handling kind for floating point instruction inputs and outputs.
Definition: FloatingPointMode.h:69
llvm::InlineAsm::Kind_RegUse
@ Kind_RegUse
Definition: InlineAsm.h:240
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
llvm::PPCTargetLowering::getRegisterByName
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
Definition: PPCISelLowering.cpp:16590
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1172
MCRegisterInfo.h
llvm::TargetLowering::AsmOperandInfo
This contains information for each constraint that we are lowering.
Definition: TargetLowering.h:4647
size
i< reg-> size
Definition: README.txt:166
llvm::ISD::UADDSAT
@ UADDSAT
Definition: ISDOpcodes.h:341
llvm::TargetLowering::CallLoweringInfo::DL
SDLoc DL
Definition: TargetLowering.h:4224
llvm::PPCTargetLowering::enableAggressiveFMAFusion
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Definition: PPCISelLowering.cpp:1799
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2363
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:742
llvm::PPCFunctionInfo::getVarArgsNumFPR
unsigned getVarArgsNumFPR() const
Definition: PPCMachineFunctionInfo.h:244
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:446
llvm::SDNode::dump
void dump() const
Dump this node, for debugging.
Definition: SelectionDAGDumper.cpp:554
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition: SelectionDAG.cpp:1440
ArrayRef.h
llvm::TargetLoweringBase::NegatibleCost
NegatibleCost
Enum that specifies when a float negation is beneficial.
Definition: TargetLowering.h:279
llvm::PPCISD::SRA
@ SRA
Definition: PPCISelLowering.h:168
llvm::SelectionDAG::getAnyExtOrTrunc
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Definition: SelectionDAG.cpp:1434
DisableInnermostLoopAlign32
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
llvm::PPCSubtarget::isPredictableSelectIsExpensive
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:285
llvm::GlobalAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:1776
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:170
llvm::TargetLoweringBase::hasBigEndianPartOrdering
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
Definition: TargetLowering.h:1651
llvm::APInt::getBoolValue
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:459
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
llvm::PPCISD::Hi
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Definition: PPCISelLowering.h:137
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:130
llvm::PPC::PRED_NE
@ PRED_NE
Definition: PPCPredicates.h:32
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:911
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:582
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2376
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition: MachineFrameInfo.h:639
llvm::PPCII::MO_PIC_FLAG
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:110
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1414
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:179
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:119
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1438
llvm::APFloatBase::PPCDoubleDouble
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:251
llvm::PPCISD::PROBED_ALLOCA
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
Definition: PPCISelLowering.h:155
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition: TargetLowering.h:4198
llvm::PPCFunctionInfo::setVarArgsStackOffset
void setVarArgsStackOffset(int Offset)
Definition: PPCMachineFunctionInfo.h:228
IRBuilder.h
llvm::PPCISD::EXTRACT_SPE
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
Definition: PPCISelLowering.h:240
llvm::ISD::ArgFlagsTy::isNest
bool isNest() const
Definition: TargetCallingConv.h:118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition: SelectionDAG.cpp:1964
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
llvm::PPCISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
Definition: PPCISelLowering.h:275
DisableSCO
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
llvm::ISD::ADJUST_TRAMPOLINE
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1129
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:638
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:925
llvm::TargetLoweringBase::ArgListEntry
Definition: TargetLowering.h:293
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:688
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1448
llvm::CCValAssign::getCustomReg
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:89
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1601
llvm::X86AS::FS
@ FS
Definition: X86.h:201
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:265
llvm::ISD::STACKMAP
@ STACKMAP
Definition: ISDOpcodes.h:1297
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:202
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1206
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:62
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1434
llvm::PPCISD::BUILD_FP128
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
Definition: PPCISelLowering.h:231
llvm::ISD::EH_DWARF_CFA
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9946
InlinePriorityMode::ML
@ ML
llvm::PPCTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Definition: PPCISelLowering.cpp:1791
MachineModuleInfo.h
llvm::PPCII::MO_TLS
@ MO_TLS
Definition: PPC.h:174
llvm::ISD::TargetGlobalTLSAddress
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition: SelectionDAG.cpp:2250
llvm::ISD::RETURNADDR
@ RETURNADDR
Definition: ISDOpcodes.h:95
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:3056
llvm::PPCISD::EXTRACT_VSX_REG
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
Definition: PPCISelLowering.h:487
combineBVOfVecSExt
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:14374
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
isConsecutiveLSLoc
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13297
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:123
llvm::RISCVISD::SRAW
@ SRAW
Definition: RISCVISelLowering.h:66
llvm::PPCTargetLowering::getExceptionPointerRegister
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
Definition: PPCISelLowering.cpp:17030
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::PPCISD::DYNAREAOFFSET
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
Definition: PPCISelLowering.h:151
llvm::MDNode
Metadata node.
Definition: Metadata.h:943
RA
SI optimize exec mask operations pre RA
Definition: SIOptimizeExecMaskingPreRA.cpp:71
llvm::TargetLowering::CW_Memory
@ CW_Memory
Definition: TargetLowering.h:4641
R6
#define R6(n)
llvm::MVT::v256i1
@ v256i1
Definition: MachineValueType.h:74
llvm::CodeGenOpt::None
@ None
-O0
Definition: CodeGen.h:58
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2397
llvm::PPCISD::XSMAXC
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
Definition: PPCISelLowering.h:56
llvm::SelectionDAG::CreateStackTemporary
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition: SelectionDAG.cpp:2368
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:750
llvm::ISD::VASTART
@ VASTART
Definition: ISDOpcodes.h:1086
llvm::PPCTargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition: PPCISelLowering.cpp:16636
llvm::PPC::isVMRGEOShuffleMask
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
Definition: PPCISelLowering.cpp:2083
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1332
llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition: MachineMemOperand.h:79
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
info
lazy value info
Definition: LazyValueInfo.cpp:58
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::SelectionDAG::getTokenFactor
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
Definition: SelectionDAG.cpp:12145
llvm::TargetLowering::CW_Default
@ CW_Default
Definition: TargetLowering.h:4643
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:258
llvm::ISD::ArgFlagsTy::setByValSize
void setByValSize(unsigned S)
Definition: TargetCallingConv.h:173
llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:1034
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::BranchProbability::getOne
static BranchProbability getOne()
Definition: BranchProbability.h:50
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:694
llvm::Type::FP128TyID
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
llvm::isIntS16Immediate
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
Definition: PPCISelLowering.cpp:2586
llvm::TargetLowering::CallLoweringInfo::Ins
SmallVector< ISD::InputArg, 32 > Ins
Definition: TargetLowering.h:4228
llvm::ISD::ConstantPool
@ ConstantPool
Definition: ISDOpcodes.h:82
llvm::RetCC_PPC
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
TargetOptions.h
llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
llvm::CCState::isVarArg
bool isVarArg() const
Definition: CallingConvLower.h:237
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:117
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1323
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:499
llvm::BlockAddress
The address of a basic block.
Definition: Constants.h:875
llvm::ISD::TargetConstantPool
@ TargetConstantPool
Definition: ISDOpcodes.h:168
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:4223
llvm::BuildVectorSDNode::isConstant
bool isConstant() const
Definition: SelectionDAG.cpp:12030
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::MVT::fixedlen_vector_valuetypes
static auto fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1542
llvm::PPCISD::SCALAR_TO_VECTOR_PERMUTED
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
Definition: PPCISelLowering.h:258
llvm::ISD::isSEXTLoad
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
Definition: SelectionDAGNodes.h:3068
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::PPCISD::STRICT_FCFIDU
@ STRICT_FCFIDU
Definition: PPCISelLowering.h:500
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::PPCTargetLowering::emitEHSjLjLongJmp
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:12067
llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition: SelectionDAGNodes.h:3049
llvm::ISD::UMAX
@ UMAX
Definition: ISDOpcodes.h:663
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1383
llvm::ConstantPoolSDNode
Definition: SelectionDAGNodes.h:1887
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:1196
llvm::BlockAddressSDNode::getBlockAddress
const BlockAddress * getBlockAddress() const
Definition: SelectionDAGNodes.h:2212
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition: CallingConvLower.h:328
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:429
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
llvm::BuildVectorSDNode::isConstantSplat
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
Definition: SelectionDAG.cpp:11740
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:25
llvm::SDNode::use_end
static use_iterator use_end()
Definition: SelectionDAGNodes.h:804
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:110
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
isLoad
static bool isLoad(int Opcode)
Definition: ARCInstrInfo.cpp:53
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition: TargetLowering.cpp:9440
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition: TargetLowering.cpp:5232
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1037
llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition: SelectionDAGNodes.h:1992
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:417
llvm::PPCISD::VCMP
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
Definition: PPCISelLowering.h:281
llvm::ShuffleVectorSDNode::getMaskElt
int getMaskElt(unsigned Idx) const
Definition: SelectionDAGNodes.h:1545
llvm::logicalview::LVAttributeKind::Zero
@ Zero
llvm::Offset
@ Offset
Definition: DWP.cpp:406
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm::Sched::Hybrid
@ Hybrid
Definition: TargetLowering.h:102
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::STRICT_SINT_TO_FP
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:448
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::PPCISD::STRICT_FADDRTZ
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
Definition: PPCISelLowering.h:505
llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition: TargetLowering.h:3946
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:933
llvm::PPCISD::STRICT_FCFIDS
@ STRICT_FCFIDS
Definition: PPCISelLowering.h:501
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10541
llvm::ISD::OutputArg::Flags
ArgFlagsTy Flags
Definition: TargetCallingConv.h:234
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:168
llvm::PPCTargetLowering::SelectAddressEVXRegReg
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
Definition: PPCISelLowering.cpp:2617
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2596
PPCPerfectShuffle.h
llvm::TargetRegisterInfo::isTypeLegalForClass
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
Definition: TargetRegisterInfo.h:296
llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition: ISDOpcodes.h:401
uint32_t
llvm::StackOffset
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
Compiler.h
llvm::TargetLoweringBase::IsStrictFPEnabled
bool IsStrictFPEnabled
Definition: TargetLowering.h:3507
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3488
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1149
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
llvm::PPCTargetLowering::SelectAddressRegImm34
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
Definition: PPCISelLowering.cpp:2848
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::PPCTargetLowering::CallFlags::IsTailCall
const bool IsTailCall
Definition: PPCISelLowering.h:1174
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetLowering::getCheaperNegatedExpression
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
Definition: TargetLowering.h:4129
llvm::TargetLowering::isGAPlusOffset
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
Definition: TargetLowering.cpp:5129
llvm::PPCISD::FCFIDUS
@ FCFIDUS
Definition: PPCISelLowering.h:68
combineADDToADDZE
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:17282
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition: SelectionDAGNodes.h:1602
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:921
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition: SelectionDAGNodes.h:1185
llvm::PICLevel::Level
Level
Definition: CodeGen.h:36
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::PPC::DIR_PWR4
@ DIR_PWR4
Definition: PPCSubtarget.h:55
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition: TargetLowering.cpp:5315
isNByteElemShuffleMask
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
Definition: PPCISelLowering.cpp:2201
llvm::TargetMachine::shouldAssumeDSOLocal
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
Definition: TargetMachine.cpp:88
hasSameArgumentList
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
Definition: PPCISelLowering.cpp:4793
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2412
isValidSplatLoad
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
Definition: PPCISelLowering.cpp:9138
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:190
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::SDNode::ops
ArrayRef< SDUse > ops() const
Definition: SelectionDAGNodes.h:930
llvm::PPCFunctionInfo::getVarArgsStackOffset
int getVarArgsStackOffset() const
Definition: PPCMachineFunctionInfo.h:227
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:922
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:441
llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:469
llvm::TargetLoweringBase::setMinStackArgumentAlignment
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
Definition: TargetLowering.h:2538
isScalarToVec
static SDValue isScalarToVec(SDValue Op)
Definition: PPCISelLowering.cpp:14928
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:270
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::PPCISD::PPC32_PICGOT
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
Definition: PPCISelLowering.h:326
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:177
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::PPCISD::LXVRZX
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
Definition: PPCISelLowering.h:557
LoadOps
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
Definition: AggressiveInstCombine.cpp:611
llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:733
setUsesTOCBasePtr
static void setUsesTOCBasePtr(MachineFunction &MF)
Definition: PPCISelLowering.cpp:3114
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition: MachineMemOperand.h:138
llvm::MVT::v512i1
@ v512i1
Definition: MachineValueType.h:75
llvm::TargetLowering::CallLoweringInfo::setIsPostTypeLegalization
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
Definition: TargetLowering.h:4348
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:655
llvm::MVT::v1i128
@ v1i128
Definition: MachineValueType.h:141
generateEquivalentSub
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
Definition: PPCISelLowering.cpp:13480
llvm::SignExtend64
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:557
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1414
llvm::PPCISD::LD_VSX_LH
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
Definition: PPCISelLowering.h:566
llvm::GlobalValue::isStrongDefinitionForLinker
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:627
llvm::PPC::getSplatIdxForPPCMnemonics
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
Definition: PPCISelLowering.cpp:2458
llvm::PPCTargetLowering::isJumpTableRelative
bool isJumpTableRelative() const override
Definition: PPCISelLowering.cpp:3186
LowerMemOpCallTo
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
Definition: PPCISelLowering.cpp:5083
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:824
llvm::PPCISD::PADDI_DTPREL
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
Definition: PPCISelLowering.h:410
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1714
llvm::ComplexDeinterleavingOperation::Shuffle
@ Shuffle
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1502
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::PPCTargetLowering::EmitAtomicBinary
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
Definition: PPCISelLowering.cpp:11536
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition: TargetLowering.h:4622
llvm::PPC::AM_XForm
@ AM_XForm
Definition: PPCISelLowering.h:744
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::RetCC_PPC_Cold
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:668
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition: TargetLowering.h:323
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:644
llvm::PPCTargetLowering::getScratchRegisters
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Definition: PPCISelLowering.cpp:17018
llvm::PPCISD::LXVD2X
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:551
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2367
llvm::PPCISD::ADDIS_TLSGD_HA
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
Definition: PPCISelLowering.h:350
llvm::PPCISD::CALL_NOP_RM
@ CALL_NOP_RM
Definition: PPCISelLowering.h:208
llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:727
llvm::PPCISD::ADDI_DTPREL_L
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
Definition: PPCISelLowering.h:406
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:973
llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition: SelectionDAGNodes.h:1444
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::PPCTargetLowering::getStackProbeSize
unsigned getStackProbeSize(const MachineFunction &MF) const
Definition: PPCISelLowering.cpp:12176
llvm::PPC::isVPKUDUMShuffleMask
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
Definition: PPCISelLowering.cpp:1901
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:912
llvm::APInt::bitsToDouble
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1668
llvm::PPCISD::FCTIWUZ
@ FCTIWUZ
Definition: PPCISelLowering.h:79
llvm::PPCTargetLowering::isFPExtFree
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
Definition: PPCISelLowering.cpp:16862
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
CallingConv.h
llvm::PPCISD::ADDI_TLSLD_L_ADDR
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
Definition: PPCISelLowering.h:396
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:358
getLabelAccessInfo
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
Definition: PPCISelLowering.cpp:3082
llvm::TargetLowering::CallLoweringInfo::IsTailCall
bool IsTailCall
Definition: TargetLowering.h:4214
llvm::PPCTargetLowering::CollectTargetIntrinsicOperands
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
Definition: PPCISelLowering.cpp:16457
llvm::SDNode::isOnlyUserOf
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
Definition: SelectionDAG.cpp:11228
j
return j(j<< 16)
llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:420
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1457
llvm::PPCISD::FSEL
@ FSEL
FSEL - Traditional three-operand fsel node.
Definition: PPCISelLowering.h:53
llvm::DataLayout::getIntPtrType
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:861
llvm::PPCISD::RET_FLAG
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
Definition: PPCISelLowering.h:214
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:10927
llvm::PPCISD::TOC_ENTRY
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
Definition: PPCISelLowering.h:607
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:124
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2597
llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition: CallingConvLower.h:313
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:121
llvm::PPCTargetLowering::CallFlags::IsVarArg
const bool IsVarArg
Definition: PPCISelLowering.h:1175
setXFormForUnalignedFI
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
Definition: PPCISelLowering.cpp:18166
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:221
llvm::PPC::isXXBRQShuffleMask
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
Definition: PPCISelLowering.cpp:2390
llvm::TargetLoweringBase::IntrinsicInfo
Definition: TargetLowering.h:1051
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::ISD::STRICT_FMUL
@ STRICT_FMUL
Definition: ISDOpcodes.h:402
PPCCallingConv.h
llvm::PPC::DIR_PWR6
@ DIR_PWR6
Definition: PPCSubtarget.h:58
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::ArgFlagsTy::getByValSize
unsigned getByValSize() const
Definition: TargetCallingConv.h:169
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:943
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
llvm::PPCFunctionInfo::FixedType
@ FixedType
Definition: PPCMachineFunctionInfo.h:27
getMaxByValAlign
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
Definition: PPCISelLowering.cpp:1565
llvm::PPCISD::STRICT_FCTIWUZ
@ STRICT_FCTIWUZ
Definition: PPCISelLowering.h:496
llvm::PPC::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Definition: PPCFastISel.cpp:2466
llvm::PPCTargetLowering::SelectAddressRegImm
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
Definition: PPCISelLowering.cpp:2743
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2958
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2390
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1763
llvm::KnownBits
Definition: KnownBits.h:23
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:638
EnableQuadwordAtomics
static cl::opt< bool > EnableQuadwordAtomics("ppc-quadword-atomics", cl::desc("enable quadword lock-free atomic operations"), cl::init(false), cl::Hidden)
llvm::PPCISD::BCTRL_LOAD_TOC_RM
@ BCTRL_LOAD_TOC_RM
Definition: PPCISelLowering.h:211
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
llvm::SDNode::getNumOperands
unsigned getNumOperands() const
Return the number of values used by this operation.
Definition: SelectionDAGNodes.h:908
llvm::AIXCCState::isFixed
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:250
llvm::ISD::isEXTLoad
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
Definition: SelectionDAGNodes.h:3062
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:174
uint16_t
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2599
llvm::PPCISD::STBRX
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
Definition: PPCISelLowering.h:516
CallingConvLower.h
rotate
The same transformation can work with an even modulo with the addition of a rotate
Definition: README.txt:680
llvm::TargetLowering::CallLoweringInfo::setZExtResult
CallLoweringInfo & setZExtResult(bool Value=true)
Definition: TargetLowering.h:4333
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:10917
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:295
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:357
llvm::PPCTargetLowering::useLoadStackGuardNode
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
Definition: PPCISelLowering.cpp:17145
llvm::ISD::BR
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:981
llvm::ISD::TargetExternalSymbol
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
getPPCStrictOpcode
static unsigned getPPCStrictOpcode(unsigned Opc)
Definition: PPCISelLowering.cpp:8031
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::object::BCTR
@ BCTR
Definition: ELF.h:92
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:915
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:550
llvm::MachineMemOperand::getSize
uint64_t getSize() const
Return the size in bytes of the memory reference.
Definition: MachineMemOperand.h:235
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:923
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1349
llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition: TargetLowering.h:4630
llvm::bit_floor
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:291
llvm::PPCISD::FCTIDUZ
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
Definition: PPCISelLowering.h:78
getSToVPermuted
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:14965
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:914
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:468
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
llvm::PPCISD::ADDIS_DTPREL_HA
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
Definition: PPCISelLowering.h:401
ISDOpcodes.h
Success
#define Success
Definition: AArch64Disassembler.cpp:300
isBLACompatibleAddress
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
Definition: PPCISelLowering.cpp:4965
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:738
Enabled
static bool Enabled
Definition: Statistic.cpp:46
llvm::PPCISD::ANDI_rec_1_EQ_BIT
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
Definition: PPCISelLowering.h:264
llvm::APInt::isNegatedPowerOf2
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:441
llvm::AIXCCState
Definition: PPCCCState.h:41
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::ISD::INLINEASM_BR
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1027
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
Casting.h
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
llvm::PPCFunctionInfo::getVarArgsNumGPR
unsigned getVarArgsNumGPR() const
Definition: PPCMachineFunctionInfo.h:230
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:425
llvm::ISD::ArgFlagsTy::isSExt
bool isSExt() const
Definition: TargetCallingConv.h:76
llvm::PPCTargetLowering::BuildSDIVPow2
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Definition: PPCISelLowering.cpp:16021
Function.h
llvm::CCState::AllocateStack
unsigned AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Definition: CallingConvLower.h:402
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:201
llvm::PPCFrameLowering::getFramePointerSaveOffset
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
Definition: PPCFrameLowering.cpp:2702
llvm::PPCTargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Definition: PPCISelLowering.cpp:16875
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
void AddToWorklist(SDNode *N)
Definition: DAGCombiner.cpp:977
llvm::PPC::MOF_SubWordInt
@ MOF_SubWordInt
Definition: PPCISelLowering.h:723
llvm::SelectionDAG::getTargetExternalSymbol
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1915
llvm::SelectionDAG::getMCSymbol
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
Definition: SelectionDAG.cpp:1906
llvm::PPCISD::SWAP_NO_CHAIN
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
Definition: PPCISelLowering.h:441
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:241
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1444
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:234
llvm::PPCISD::NodeType
NodeType
Definition: PPCISelLowering.h:47
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
usePartialVectorLoads
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
Definition: PPCISelLowering.cpp:2950
llvm::PPC::isVPKUWUMShuffleMask
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
Definition: PPCISelLowering.cpp:1864
llvm::PPCFunctionInfo::VectorFloat
@ VectorFloat
Definition: PPCMachineFunctionInfo.h:33
llvm::SDNode::getNumValues
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Definition: SelectionDAGNodes.h:983
llvm::TargetLoweringBase::shouldExpandAtomicCmpXchgInIR
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: TargetLowering.h:2151
llvm::TLSModel::Model
Model
Definition: CodeGen.h:45
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::PPCISD::CALL
@ CALL
CALL - A direct function call.
Definition: PPCISelLowering.h:188
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition: TargetLowering.h:233
StringSwitch.h
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition: SelectionDAGNodes.h:379
llvm::PPCISD::CR6UNSET
@ CR6UNSET
Definition: PPCISelLowering.h:318
llvm::PPCTargetLowering::insertSSPDeclarations
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: PPCISelLowering.cpp:17153
llvm::TargetLoweringBase::getSchedulingPreference
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
Definition: TargetLowering.h:878
llvm::PPCISD::BCTRL_LOAD_TOC
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
Definition: PPCISelLowering.h:203
llvm::TargetLowering::CW_Invalid
@ CW_Invalid
Definition: TargetLowering.h:4632
transformCallee
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5295
llvm::PPC::MOF_RPlusSImm16Mult4
@ MOF_RPlusSImm16Mult4
Definition: PPCISelLowering.h:715
llvm::PPCISD::BCTRL_RM
@ BCTRL_RM
Definition: PPCISelLowering.h:210
combineADDToMAT_PCREL_ADDR
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:17368
llvm::TargetLoweringBase::setJumpIsExpensive
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
Definition: TargetLoweringBase.cpp:951
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:186
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
llvm::PPCII::MO_HA
@ MO_HA
Definition: PPC.h:162
llvm::PPCISD::LOAD_VEC_BE
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:562
PPCISelLowering.h
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:26
llvm::TargetLoweringBase::setLibcallName
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
Definition: TargetLowering.h:3168
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:966
llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1081
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1399
llvm::TargetRegisterInfo::getNoPreservedMask
virtual const uint32_t * getNoPreservedMask() const
Return a register mask that clobbers everything.
Definition: TargetRegisterInfo.h:496
llvm::TargetLowering::CallLoweringInfo::NoMerge
bool NoMerge
Definition: TargetLowering.h:4210
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:766
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::PPC::PRED_UN
@ PRED_UN
Definition: PPCPredicates.h:33
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:751
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:449
llvm::PPCISD::ADDIS_TLSLD_HA
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
Definition: PPCISelLowering.h:380
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1177
llvm::TargetLoweringBase::setHasMultipleConditionRegisters
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
Definition: TargetLowering.h:2331
llvm::codeview::ModifierOptions::Const
@ Const
llvm::AtomicRMWInst::UIncWrap
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:770
llvm::PPC::MOF_WordInt
@ MOF_WordInt
Definition: PPCISelLowering.h:724
llvm::PPCISD::ADDI_TLSGD_L
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
Definition: PPCISelLowering.h:356
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:426
llvm::checkConvertToNonDenormSingle
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
Definition: PPCISelLowering.cpp:9127
llvm::APInt::abs
APInt abs() const
Get the absolute value.
Definition: APInt.h:1734
llvm::PPCTargetLowering::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: PPCISelLowering.cpp:16826
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
llvm::FPOpFusion::Fast
@ Fast
Definition: TargetOptions.h:37
CodeGen.h
llvm::PPCISD::EXTSWSLI
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
Definition: PPCISelLowering.h:176
callsShareTOCBase
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
Definition: PPCISelLowering.cpp:4677
getCanonicalConstSplat
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
Definition: PPCISelLowering.cpp:8950
llvm::PPCISD::TLSGD_AIX
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
Definition: PPCISelLowering.h:375
llvm::TLSModel::InitialExec
@ InitialExec
Definition: CodeGen.h:48
getBaseWithConstantOffset
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13285
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:31
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::MachineFunction::getPICBaseSymbol
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
Definition: MachineFunction.cpp:734
llvm::PPC::MOF_RPlusSImm16
@ MOF_RPlusSImm16
Definition: PPCISelLowering.h:713
llvm::SDNode::hasNUsesOfValue
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
Definition: SelectionDAG.cpp:11199
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1293
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::PPCISD::MFOCRF
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
Definition: PPCISelLowering.h:219
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2290
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1439
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:928
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:870
llvm::PPC::isVMRGLShuffleMask
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
Definition: PPCISelLowering.cpp:1968
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:175
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106
llvm::PPCISD::VECINSERT
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
Definition: PPCISelLowering.h:118
getOutputChainFromCallSeq
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
Definition: PPCISelLowering.cpp:5373
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:217
llvm::LSBaseSDNode
Base class for LoadSDNode and StoreSDNode.
Definition: SelectionDAGNodes.h:2315
llvm::PPCISD::CALL_NOTOC
@ CALL_NOTOC
Definition: PPCISelLowering.h:190
Instructions.h
llvm::PPCSubtarget::isUsingPCRelativeCalls
bool isUsingPCRelativeCalls() const
Definition: PPCSubtarget.cpp:184
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5364
llvm::PPCTargetLowering::emitTrailingFence
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Definition: PPCISelLowering.cpp:11516
llvm::CC_PPC32_SVR4
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::MVT::f128
@ f128
Definition: MachineValueType.h:60
combineBVZEXTLOAD
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:14472
llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1145
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::GlobalAlias::getAliaseeObject
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:562
truncateScalarIntegerArg
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
Definition: PPCISelLowering.cpp:6849
llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1112
PrepareTailCall
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
Definition: PPCISelLowering.cpp:5107
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::PPCISD::SINT_VEC_TO_FP
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
Definition: PPCISelLowering.h:246
SmallVector.h
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:394
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1049
llvm::PPC::AM_PCRel
@ AM_PCRel
Definition: PPCISelLowering.h:745
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:309
llvm::PPCISD::LD_SPLAT
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
Definition: PPCISelLowering.h:570
MachineInstrBuilder.h
isGPRShadowAligned
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
Definition: PPCISelLowering.cpp:6561
addShuffleForVecExtend
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
Definition: PPCISelLowering.cpp:14335
llvm::PPCFunctionInfo::LongFloatingPoint
@ LongFloatingPoint
Definition: PPCMachineFunctionInfo.h:29
llvm::MCSymbolXCOFF
Definition: MCSymbolXCOFF.h:19
llvm::InlineAsm::getNumOperandRegisters
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:363
llvm::PPCISD::SHL
@ SHL
Definition: PPCISelLowering.h:169
llvm::PPCISD::VEXTS
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
Definition: PPCISelLowering.h:87
llvm::ISD::isUnsignedIntSetCC
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1473
llvm::ISD::ArgFlagsTy::getNonZeroByValAlign
Align getNonZeroByValAlign() const
Definition: TargetCallingConv.h:153
llvm::PPC::DIR_PWR5
@ DIR_PWR5
Definition: PPCSubtarget.h:56
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::PPCFunctionInfo::VectorShort
@ VectorShort
Definition: PPCMachineFunctionInfo.h:31
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:199
llvm::ISD::ZERO_EXTEND_VECTOR_INREG
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:814
llvm::PPCSubtarget::getEnvironmentPointerRegister
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:267
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:118
llvm::PPCSubtarget::descriptorEnvironmentPointerOffset
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:261
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::TargetLowering::useLoadStackGuardNode
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: TargetLowering.h:5183
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1285
llvm::MCSectionXCOFF::getQualNameSymbol
MCSymbolXCOFF * getQualNameSymbol() const
Definition: MCSectionXCOFF.h:110
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:704
llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Definition: TargetLowering.h:2546
CreateCopyOfByValArgument
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Definition: PPCISelLowering.cpp:5072
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::PPCTargetLowering::decomposeMulByConstant
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Definition: PPCISelLowering.cpp:16917
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:90
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::PPCTargetLowering::SelectForceXFormMode
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Definition: PPCISelLowering.cpp:18000
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::PPCTargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: PPCISelLowering.cpp:12360
isFloatingPointZero
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
Definition: PPCISelLowering.cpp:1809
llvm::PPCTargetLowering::isZExtFree
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Definition: PPCISelLowering.cpp:16842
llvm::PPCISD::PAIR_BUILD
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
Definition: PPCISelLowering.h:481
llvm::SelectionDAG::getRegisterMask
SDValue getRegisterMask(const uint32_t *RegMask)
Definition: SelectionDAG.cpp:2162
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::CCValAssign::getReg
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Definition: CallingConvLower.h:82
SelectTypeKind::FP
@ FP
llvm::PPCTargetLowering::useSoftFloat
bool useSoftFloat() const override
Definition: PPCISelLowering.cpp:1604
llvm::TargetLowering::DAGCombinerInfo::CombineTo
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition: DAGCombiner.cpp:982
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2473
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:891
llvm::PPCISD::FRE
@ FRE
Reciprocal estimate instructions (unary FP ops).
Definition: PPCISelLowering.h:90
PPCMachineFunctionInfo.h
llvm::ISD::TargetJumpTable
@ TargetJumpTable
Definition: ISDOpcodes.h:167
llvm::ISD::STRICT_FADD
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:292
llvm::PPCISD::ST_VSR_SCAL_INT
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
Definition: PPCISelLowering.h:591
llvm::PPCTargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: PPCISelLowering.cpp:18350
llvm::PPCISD::XXPERMDI
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
Definition: PPCISelLowering.h:126
PPC
should just be implemented with a CLZ instruction Since there are other e PPC
Definition: README.txt:709
isPCRelNode
static bool isPCRelNode(SDValue N)
Definition: PPCISelLowering.cpp:17873
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:216
llvm::MVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
Definition: MachineValueType.h:386
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2595
llvm::PPCFunctionInfo::getFramePointerSaveIndex
int getFramePointerSaveIndex() const
Definition: PPCMachineFunctionInfo.h:161
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2828
llvm::KnownBits::isConstant
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
llvm::ISD::isZEXTLoad
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
Definition: SelectionDAGNodes.h:3074
GeneratePerfectShuffle
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Definition: PPCISelLowering.cpp:9477
llvm::ISD::UMIN
@ UMIN
Definition: ISDOpcodes.h:662
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
widenVec
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
Definition: PPCISelLowering.cpp:8422
llvm::PPC::MOF_NoExt
@ MOF_NoExt
Definition: PPCISelLowering.h:709
MachineMemOperand.h
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:637
llvm::TargetOptions::UnsafeFPMath
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Definition: TargetOptions.h:163
llvm::PPCISD::GET_TLS_ADDR
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
Definition: PPCISelLowering.h:361
llvm::TargetLowering::CallLoweringInfo::OutVals
SmallVector< SDValue, 32 > OutVals
Definition: TargetLowering.h:4227
MachineOperand.h
RegName
#define RegName(no)
llvm::PPCISD::BCTRL
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
Definition: PPCISelLowering.h:198
llvm::PPCTargetLowering::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Definition: PPCISelLowering.cpp:17061
llvm::PPCFunctionInfo::ShortFloatingPoint
@ ShortFloatingPoint
Definition: PPCMachineFunctionInfo.h:28
isFPExtLoad
static bool isFPExtLoad(SDValue Op)
Definition: PPCISelLowering.cpp:14136
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:930
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:641
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:911
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1137
llvm::StringSwitch::Default
R Default(T Value)
Definition: StringSwitch.h:182
llvm::PPCFunctionInfo::setLRStoreRequired
void setLRStoreRequired()
Definition: PPCMachineFunctionInfo.h:215
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1184
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:671
llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >
DerivedTypes.h
invertFMAOpcode
static unsigned invertFMAOpcode(unsigned Opc)
Definition: PPCISelLowering.cpp:17068
llvm::PPC::MOF_AddrIsSImm32
@ MOF_AddrIsSImm32
Definition: PPCISelLowering.h:720
llvm::PPCFunctionInfo::setFramePointerSaveIndex
void setFramePointerSaveIndex(int Idx)
Definition: PPCMachineFunctionInfo.h:162
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
isTOCSaveRestoreRequired
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5229
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1442
llvm::PPCSubtarget::POPCNTD_Fast
@ POPCNTD_Fast
Definition: PPCSubtarget.h:76
llvm::TargetLowering::CallLoweringInfo::Callee
SDValue Callee
Definition: TargetLowering.h:4221
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
llvm::PPCFunctionInfo::setVarArgsFrameIndex
void setVarArgsFrameIndex(int Index)
Definition: PPCMachineFunctionInfo.h:225
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1485
getRegClassForSVT
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
Definition: PPCISelLowering.cpp:6820
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:910
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:465
llvm::SelectionDAG::ComputeNumSignBits
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: SelectionDAG.cpp:4009
llvm::omp::RTLDependInfoFields::Flags
@ Flags
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCISD::CMPB
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
Definition: PPCISelLowering.h:130
llvm::LoadInst::isUnordered
bool isUnordered() const
Definition: Instructions.h:258
llvm::PPCISD::GET_TLSLD_ADDR
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
Definition: PPCISelLowering.h:391
llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.h:233
llvm::PPCISD::ATOMIC_CMP_SWAP_16
@ ATOMIC_CMP_SWAP_16
Definition: PPCISelLowering.h:597
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
llvm::ISD::GET_ROUNDING
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:862
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1076
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition: SelectionDAG.cpp:1898
llvm::PPCISD::FP_TO_UINT_IN_VSR
@ FP_TO_UINT_IN_VSR
Floating-point-to-integer conversion instructions.
Definition: PPCISelLowering.h:82
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::TargetLoweringBase::MaxLoadsPerMemcmp
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
Definition: TargetLowering.h:3472
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::SelectionDAG::getMDNode
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
Definition: SelectionDAG.cpp:2235
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:372
llvm::Sched::Preference
Preference
Definition: TargetLowering.h:98
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::ISD::isNormalStore
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Definition: SelectionDAGNodes.h:3087
llvm::PPCII::MO_TPREL_FLAG
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition: PPC.h:132
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::NVPTX::VecShuffle
@ VecShuffle
Definition: NVPTX.h:89
llvm::cl::desc
Definition: CommandLine.h:411
llvm::InlineAsm::Kind_Clobber
@ Kind_Clobber
Definition: InlineAsm.h:243
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::MVT::fp_valuetypes
static auto fp_valuetypes()
Definition: MachineValueType.h:1531
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1500
llvm::M1
unsigned M1(unsigned Val)
Definition: VE.h:468
llvm::PPC::get_VSPLTI_elt
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
Definition: PPCISelLowering.cpp:2478
llvm::PPCTargetLowering::getRegForInlineAsmConstraint
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Definition: PPCISelLowering.cpp:16250
needStackSlotPassParameters
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
Definition: PPCISelLowering.cpp:4757
llvm::PPC::MOF_SubtargetBeforeP9
@ MOF_SubtargetBeforeP9
Definition: PPCISelLowering.h:731
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
raw_ostream.h
llvm::PPCII::MO_TPREL_LO
@ MO_TPREL_LO
Definition: PPC.h:164
llvm::PPCISD::STRICT_FCFID
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
Definition: PPCISelLowering.h:499
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:250
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:88
llvm::PPCISD::FTSQRT
@ FTSQRT
Test instruction for software square root.
Definition: PPCISelLowering.h:94
llvm::TargetLoweringBase::shouldExpandBuildVectorWithShuffles
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
Definition: TargetLowering.h:506
llvm::PPCTargetLowering::getTargetNodeName
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
Definition: PPCISelLowering.cpp:1616
isVMerge
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
Definition: PPCISelLowering.cpp:1944
MachineFunction.h
llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:924
setAlignFlagsForFI
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
Definition: PPCISelLowering.cpp:17791
buildCallOperands
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5493
convertFPToInt
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:8054
PPCPredicates.h
llvm::PPCISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
Definition: PPCISelLowering.h:272
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:98
llvm::SDNode::getFlags
SDNodeFlags getFlags() const
Definition: SelectionDAGNodes.h:972
llvm::PPCTargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
Definition: PPCISelLowering.cpp:16871
Value.h
llvm::SelectionDAG::getStackArgumentTokenFactor
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
Definition: SelectionDAG.cpp:6836
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:132
MCExpr.h
llvm::TargetLowering::getSqrtInputTest
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
Definition: TargetLowering.cpp:6742
llvm::PPCTargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
Definition: PPCISelLowering.cpp:16944
llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1052
llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition: TargetLowering.h:3486
llvm::PPCTargetLowering::shouldInlineQuadwordAtomics
bool shouldInlineQuadwordAtomics() const
Definition: PPCISelLowering.cpp:18324
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:918
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
llvm::PPCTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: PPCISelLowering.cpp:15266
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8135
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::SelectionDAG::makeEquivalentMemoryOrdering
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
Definition: SelectionDAG.cpp:10864
llvm::codegen::getCodeModel
CodeModel::Model getCodeModel()
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1062
llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition: TargetLowering.h:2532
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::ISD::ROTR
@ ROTR
Definition: ISDOpcodes.h:695
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:219
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:513
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:469
TargetRegisterInfo.h
llvm::PPCFunctionInfo::getReturnAddrSaveIndex
int getReturnAddrSaveIndex() const
Definition: PPCMachineFunctionInfo.h:164
llvm::PPCISD::TLS_LOCAL_EXEC_MAT_ADDR
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
Definition: PPCISelLowering.h:475
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Debug.h
llvm::PPCFunctionInfo::setMinReservedArea
void setMinReservedArea(unsigned size)
Definition: PPCMachineFunctionInfo.h:181
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:139
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::SystemZISD::TBEGIN
@ TBEGIN
Definition: SystemZISelLowering.h:153
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1161
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:482
llvm::PPCISD::ADDI_TLSGD_L_ADDR
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
Definition: PPCISelLowering.h:366
isConsecutiveLS
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13337
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:311
isValidPCRelNode
static bool isValidPCRelNode(SDValue N)
Definition: PPCISelLowering.cpp:2927
PPCTargetMachine.h
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1361
llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition: CallingConvLower.h:124
llvm::PPCISD::STRICT_FCTIWZ
@ STRICT_FCTIWZ
Definition: PPCISelLowering.h:494
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
llvm::SrcOp
Definition: MachineIRBuilder.h:128
llvm::PPCISD::COND_BRANCH
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
Definition: PPCISelLowering.h:294
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition: TargetLowering.cpp:5170
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:365
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::PPCTargetLowering::emitProbedAlloca
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:12198
llvm::Module::getNamedValue
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
Definition: Module.cpp:110
llvm::PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
Definition: PPCISelLowering.h:470
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:1167
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
llvm::TargetLoweringBase::LibCall
@ LibCall
Definition: TargetLowering.h:200
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7882
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::InlineAsm::Kind_Mem
@ Kind_Mem
Definition: InlineAsm.h:245
llvm::PPCISD::FRSQRTE
@ FRSQRTE
Definition: PPCISelLowering.h:91
llvm::PPCTargetLowering::hasInlineStackProbe
bool hasInlineStackProbe(const MachineFunction &MF) const override
Definition: PPCISelLowering.cpp:12168
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:163
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
getNormalLoadInput
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
Definition: PPCISelLowering.cpp:9083
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::PPCISD::VECSHL
@ VECSHL
VECSHL - The PPC vector shift left instruction.
Definition: PPCISelLowering.h:122
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition: SelectionDAG.cpp:266
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PPCISD::FCFIDS
@ FCFIDS
Definition: PPCISelLowering.h:67
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:1246
llvm::TLSModel::LocalExec
@ LocalExec
Definition: CodeGen.h:49
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::PPCSubtarget::isELFv2ABI
bool isELFv2ABI() const
Definition: PPCSubtarget.cpp:181
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1925
TargetLoweringObjectFileImpl.h
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:393
llvm::LLT
Definition: LowLevelTypeImpl.h:39
llvm::SelectionDAG::areNonVolatileConsecutiveLoads
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
Definition: SelectionDAG.cpp:11557